<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing with OASIS Tables v3.0 20080202//EN" "journalpub-oasis3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:oasis="http://docs.oasis-open.org/ns/oasis-exchange/table" xml:lang="en" dtd-version="3.0" article-type="data-paper">
  <front>
    <journal-meta><journal-id journal-id-type="publisher">ESSD</journal-id><journal-title-group>
    <journal-title>Earth System Science Data</journal-title>
    <abbrev-journal-title abbrev-type="publisher">ESSD</abbrev-journal-title><abbrev-journal-title abbrev-type="nlm-ta">Earth Syst. Sci. Data</abbrev-journal-title>
  </journal-title-group><issn pub-type="epub">1866-3516</issn><publisher>
    <publisher-name>Copernicus Publications</publisher-name>
    <publisher-loc>Göttingen, Germany</publisher-loc>
  </publisher></journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.5194/essd-15-2055-2023</article-id><title-group><article-title>Generation of global 1 km daily soil moisture product from 2000 to 2020
using ensemble learning</article-title><alt-title>Generation of global 1 km daily soil moisture product</alt-title>
      </title-group><?xmltex \runningtitle{Generation of global 1\,km daily soil moisture product}?><?xmltex \runningauthor{Y. Zhang et al.}?>
      <contrib-group>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Zhang</surname><given-names>Yufang</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="yes" rid="aff2">
          <name><surname>Liang</surname><given-names>Shunlin</given-names></name>
          <email>shunlin@hku.hk</email>
        <ext-link>https://orcid.org/0000-0003-2708-9183</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Ma</surname><given-names>Han</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1123-7447</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>He</surname><given-names>Tao</given-names></name>
          
        <ext-link>https://orcid.org/0000-0003-2079-7988</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff3">
          <name><surname>Wang</surname><given-names>Qian</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-7697-5168</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Li</surname><given-names>Bing</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Xu</surname><given-names>Jianglei</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-8147-400X</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Zhang</surname><given-names>Guodong</given-names></name>
          
        <ext-link>https://orcid.org/0000-0001-8822-7725</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Liu</surname><given-names>Xiaobang</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1285-7840</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff1">
          <name><surname>Xiong</surname><given-names>Changhao</given-names></name>
          
        </contrib>
        <aff id="aff1"><label>1</label><institution>School of Remote Sensing and Information Engineering, Wuhan
University, Wuhan 430079, China</institution>
        </aff>
        <aff id="aff2"><label>2</label><institution>Department of Geography, The University of Hong Kong, Hong Kong
999077, China</institution>
        </aff>
        <aff id="aff3"><label>3</label><institution>State Key Laboratory of Remote Sensing Science, Beijing Normal
University, Beijing 100875, China</institution>
        </aff>
        <aff id="aff4"><label>4</label><institution>Key Research Institute of Yellow River Civilization and Sustainable
Development and Collaborative Innovation Center on Yellow River
Civilization of Henan Province, Henan University, Kaifeng 475001, China</institution>
        </aff>
      </contrib-group>
      <author-notes><corresp id="corr1">Shunlin Liang (shunlin@hku.hk)</corresp></author-notes><pub-date><day>23</day><month>May</month><year>2023</year></pub-date>
      
      <volume>15</volume>
      <issue>5</issue>
      <fpage>2055</fpage><lpage>2079</lpage>
      <history>
        <date date-type="received"><day>10</day><month>October</month><year>2022</year></date>
           <date date-type="rev-request"><day>13</day><month>January</month><year>2023</year></date>
           <date date-type="rev-recd"><day>18</day><month>April</month><year>2023</year></date>
           <date date-type="accepted"><day>20</day><month>April</month><year>2023</year></date>
      </history>
      <permissions>
        <copyright-statement>Copyright: © 2023 </copyright-statement>
        <copyright-year>2023</copyright-year>
      <license license-type="open-access"><license-p>This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this licence, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link></license-p></license></permissions><self-uri xlink:href="https://essd.copernicus.org/articles/.html">This article is available from https://essd.copernicus.org/articles/.html</self-uri><self-uri xlink:href="https://essd.copernicus.org/articles/.pdf">The full text article is available as a PDF file from https://essd.copernicus.org/articles/.pdf</self-uri>
      <abstract><title>Abstract</title>

      <p id="d1e184">Motivated by the lack of long-term global soil moisture products
with both high spatial and temporal resolutions, a global 1 km daily
spatiotemporally continuous soil moisture product (GLASS SM) was generated
from 2000 to 2020 using an ensemble learning model (eXtreme Gradient
Boosting – XGBoost). The model was developed by integrating multiple
datasets, including albedo, land surface temperature, and leaf area index
products from the Global Land Surface Satellite (GLASS) product suite, as
well as the European reanalysis (ERA5-Land) soil moisture product, in situ
soil moisture dataset from the International Soil Moisture Network (ISMN),
and auxiliary datasets (Multi-Error-Removed Improved-Terrain (MERIT) DEM and
Global gridded soil information (SoilGrids)). Given the relatively large-scale differences between point-scale
in situ measurements and other datasets, the triple collocation (TC) method
was adopted to select the representative soil moisture stations and their
measurements for creating the training samples. To fully evaluate the model
performance, three validation strategies were explored: random,
site independent, and year independent. Results showed that although the
XGBoost model achieved the highest accuracy on the random test samples, it
was clearly a result of model overfitting. Meanwhile, training the model
with representative stations selected by the TC method could considerably
improve its performance for site- or year-independent test samples. The
overall validation accuracy of the model trained using representative
stations on the site-independent test samples, which was least likely to be
overfitted, was a correlation coefficient (<inline-formula><mml:math id="M1" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula>) of 0.715 and root mean square
error (RMSE) of 0.079 m<inline-formula><mml:math id="M2" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M3" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. Moreover, compared to the model
developed without station filtering, the validation accuracies of the model
trained with representative stations improved significantly for most stations,
with the median <inline-formula><mml:math id="M4" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and unbiased RMSE (ubRMSE) of the model for each station
increasing from 0.64 to 0.74 and decreasing from 0.055 to 0.052 m<inline-formula><mml:math id="M5" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M6" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. Further validation of the GLASS SM product across
four independent soil moisture networks revealed its ability to capture the
temporal dynamics of measured soil moisture (<inline-formula><mml:math id="M7" display="inline"><mml:mrow><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn></mml:mrow></mml:math></inline-formula>.69–0.89; ubRMSE <inline-formula><mml:math id="M8" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula>
0.033–0.048 m<inline-formula><mml:math id="M9" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M10" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>). Lastly, the intercomparison between the
GLASS SM product and two global microwave soil moisture datasets – the 1 km
Soil Moisture Active Passive/Sentinel-1 L2 Radiometer/Radar soil moisture
product and the European Space Agency Climate Change Initiative combined
soil moisture product at 0.25<inline-formula><mml:math id="M11" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>  – indicated that the derived
product maintained a more complete spatial coverage and exhibited high
spatiotemporal consistency with those two soil moisture products. The annual
average GLASS SM dataset from 2000 to 2020 can be freely downloaded from
<ext-link xlink:href="https://doi.org/10.5281/zenodo.7172664" ext-link-type="DOI">10.5281/zenodo.7172664</ext-link>  (Zhang et al., 2022a),
and the complete product at daily scale is available at
<uri>http://glass.umd.edu/soil_moisture/</uri> (last access: 12 May 2023).</p>
  </abstract>
    
<funding-group>
<award-group id="gs1">
<funding-source>National Natural Science Foundation of China</funding-source>
<award-id>42090011</award-id>
</award-group>
</funding-group>
</article-meta>
  </front>
<body>
      

<?pagebreak page2056?><sec id="Ch1.S1" sec-type="intro">
  <label>1</label><title>Introduction</title>
      <p id="d1e309">Soil moisture typically refers to the water content of the unsaturated soil
zone  (Liang and Wang, 2020). As an essential climate variable
specified by the Global Climate Observing System, it plays a critical role
in terrestrial water, energy, and carbon cycles
(Dorigo et
al., 2017; Humphrey et al., 2021). Over recent decades, soil moisture
datasets have been used across a wide range of earth system applications,
including climate-related research   (Berg and Sheffield,
2018), hydrological modeling   (Brocca et al., 2017),
rainfall estimating  (Brocca et al., 2019), disaster
forecasting   (Kim et al., 2019), and
agriculture and ecosystem monitoring
(Liu
et al., 2020; Holzman et al., 2014), mainly attributed to the progress in
remotely sensed soil moisture algorithms. However, substantial gaps remain
between the currently released soil moisture products and the growing
requirements of various applications, especially at regional and local
scales     (Peng et al., 2021).</p>
      <p id="d1e312">Global soil moisture products can generally be obtained through model
simulations or remote sensing, mostly at spatial resolutions of tens of
kilometers. The advantages of simulated or reanalysis soil moisture
datasets, such as the land component of the European ReAnalysis V5
(ERA5-Land) and the Global Land Data Assimilation System (GLDAS) soil
moisture products (Rodell et al.,
2004; Muñoz-Sabater et al., 2021), are their spatiotemporal continuity
and availability of root-zone estimates; however, their corresponding errors
can be rather large when the quality of forcing datasets or model
performance are relatively poor
(Sheffield et al., 2004). Alternatively,
microwave remote sensing has been regarded as the most promising technique
to acquire surface soil moisture estimates at global scale, due to its
high sensitivity to soil water content dynamics and its capacity for
all-weather monitoring (Babaeian et al.,
2019; Shi et al., 2019). Currently, several global soil moisture products
have been operationally generated from microwave scatterometers and
radiometers, including the Advanced Scatterometer (ASCAT) and Advanced
Microwave Scanning Radiometer for Earth Observing System (AMSR-E), in
addition to instruments aboard the Soil Moisture and Ocean Salinity (SMOS)
and Soil Moisture Active Passive (SMAP) satellites
(Chan
et al., 2016; Wagner et al., 2013; Njoku et al., 2003; Kerr et al., 2016),
typically with a grid spacing of 9–50 km and a revisit cycle of 1–3 d.</p>
      <p id="d1e315">Motivated by the lack of high spatial resolution soil moisture products
that are capable of benefiting numerous regional-scale applications
(Peng et al., 2021), various algorithms
have been proposed to downscale the coarser global soil moisture
products mentioned above (Peng et al., 2017), and some
of which have been used to derive global or regional soil moisture products
at fine scales in recent years. For example, by combing a Sentinel-1 synthetic
aperture radar (SAR) dataset with the SMAP radiometer dataset, Das et al. (2019) generated
global soil moisture products at 3  and 1 km resolutions. Song et al. (2022) downscaled the
AMSR-E/AMSR-2 soil moisture products using optical reflectance from the
Moderate Resolution Imaging Spectroradiometer (MODIS) and gap-filled land
surface temperature (LST) datasets, producing a 1 km daily soil moisture
product over China under all-weather conditions. Elsewhere, Naz et al. (2020) generated a daily soil moisture
reanalysis dataset (ESSMRA) at 3 km resolution over Europe by assimilating
the European Space Agency (ESA) Climate Change Initiative (CCI) product into
a community land model via an ensemble Kalman filter method. Additionally,
Vergopolan et al. (2021) recently released a 30 m subdaily soil moisture dataset across the conterminous United States
(CONUS), which was retrieved using the merged 30 m brightness temperatures
obtained by combining a hyper-resolution land surface model (HydroBlocks), a
radiative transfer model, and the SMAP Enhanced Level 3 brightness
temperatures at 9 km. Apart from these downscaled high-resolution datasets,
several studies have directly derived the 1 km operational soil moisture
products over Europe from multitemporal Sentinel-1 SAR images using change
detection algorithms, showing potential for global coverage
(Balenzano et al., 2021;
Bauer-Marschallinger et al., 2019).</p>
      <p id="d1e318">Table 1 lists the spatial and temporal coverages, temporal resolution, and
grid spacing (i.e., pixel size, which may be finer than the actual spatial
resolution) of several representative and publicly available soil moisture
products. Accordingly, there remains a lack of long-term global soil
moisture products at both high spatial and temporal resolutions. Although
the SMAP/Sentinel-1 L2 Radiometer/Radar soil moisture dataset
(SPL2SMAP_S) has global coverage and a spatial resolution up
to 1 km, its temporal resolution degrades to 12 d over most regions, owing
to the relatively long revisit cycle of Sentinel-1 SAR satellites. Recently,
Zheng et al. (2023) developed a global
seamless soil moisture dataset by downscaling the 0.25<inline-formula><mml:math id="M12" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> ESA CCI
product using a random forest model, achieving an <inline-formula><mml:math id="M13" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> of 0.89 and unbiased RMSE (ubRMSE) of
0.045 m<inline-formula><mml:math id="M14" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M15" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, but they only adopted a random cross-validation
strategy which is likely to be affected by model overfitting. Other
downscaled soil moisture datasets generally maintain regional or continental
coverage, limited by the lack of high-resolution seamless input datasets or
model applicability. Optical and thermal remote sensing techniques can
provide long-term observations with high spatiotemporal resolutions, which
have been widely used to derive soil moisture or relevant indices
(Yue
et al., 2019; Ghulam et al., 2007; Rahimzadeh-Bajgiran et al., 2013).
However, optical and thermal satellite datasets can be detrimentally
affected by cloud coverage, hindering their use in soil moisture retrieval
or downscaling across a global scale. To address this issue, the latest
versions of the Global<?pagebreak page2057?> Land Surface Satellite (GLASS) products
(Liang et al., 2021) were used here, including the
spatiotemporally continuous surface albedo, leaf area index (LAI), and land
surface temperature (LST), which were produced with reliable accuracies
primarily based on MODIS observations. In the present study, these
fine-scale GLASS products were integrated with auxiliary datasets (terrain
and soil texture) and the seamless ERA5-Land reanalysis soil moisture
product at a coarse scale using an ensemble machine learning model to
estimate daily soil moisture at 1 km resolution. This framework was adapted
from Zhang et al. (2022b), where
models were trained using Landsat 8 observations and multisource datasets
as inputs, and with the International Soil Moisture Network (ISMN) measurements
as the target. To produce a seamless global soil moisture product, Landsat
datasets prone to cloud interference were replaced with spatiotemporally
continuous GLASS products. Considering the large-scale difference between
GLASS products and in situ soil moisture compared to Landsat datasets, the
triple collocation (TC) technique
(Stoffelen, 1998; McColl
et al., 2014) was adopted to select the representative soil moisture
stations prior to model training for mitigating the influence of scale
mismatch on prediction accuracy.</p>
      <p id="d1e359">Specifically, the aim of this research was to provide a long-term
(2000–2020) global soil moisture dataset (GLASS SM) with high
spatiotemporal resolutions (1 km, daily) and reliable accuracy. To achieve
this goal, an ensemble learning model, eXtreme Gradient Boosting (XGBoost)
(Friedman, 2001; Chen and Guestrin, 2016), was developed
by integrating multisource datasets. The model was then applied to generate
the global 1 km GLASS SM product, which was further evaluated against four
independent soil moisture networks. Lastly, an intercomparison was made
between the derived product and two global microwave soil moisture products
to investigate their spatiotemporal consistency.</p>

<?xmltex \floatpos{p}?><table-wrap id="Ch1.T1" specific-use="star" orientation="landscape"><?xmltex \currentcnt{1}?><label>Table 1</label><caption><p id="d1e365">Main characteristics of several representative and publicly
available soil moisture products.</p></caption><oasis:table frame="topbot"><?xmltex \begin{scaleboxenv}{.80}[.80]?><oasis:tgroup cols="9">
     <oasis:colspec colnum="1" colname="col1" align="justify" colwidth="2cm"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="3.2cm"/>
     <oasis:colspec colnum="3" colname="col3" align="justify" colwidth="2cm"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:colspec colnum="6" colname="col6" align="justify" colwidth="2cm"/>
     <oasis:colspec colnum="7" colname="col7" align="left"/>
     <oasis:colspec colnum="8" colname="col8" align="justify" colwidth="6cm"/>
     <oasis:colspec colnum="9" colname="col9" align="left"/>
     <oasis:thead>
       <oasis:row>

         <oasis:entry colname="col1">Category</oasis:entry>

         <oasis:entry colname="col2">Soil moisture</oasis:entry>

         <oasis:entry colname="col3">Grid</oasis:entry>

         <oasis:entry colname="col4">Spatial</oasis:entry>

         <oasis:entry colname="col5">Temporal</oasis:entry>

         <oasis:entry colname="col6">Temporal</oasis:entry>

         <oasis:entry colname="col7">References</oasis:entry>

         <oasis:entry colname="col8">Data</oasis:entry>

         <oasis:entry colname="col9">Notes</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col1"/>

         <oasis:entry colname="col2">products</oasis:entry>

         <oasis:entry colname="col3">spacing</oasis:entry>

         <oasis:entry colname="col4">coverage</oasis:entry>

         <oasis:entry colname="col5">resolution</oasis:entry>

         <oasis:entry colname="col6">coverage</oasis:entry>

         <oasis:entry colname="col7"/>

         <oasis:entry colname="col8">link</oasis:entry>

         <oasis:entry colname="col9"/>

       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>

         <?xmltex \mrwidth{2cm}?><oasis:entry rowsep="1" colname="col1" morerows="5">Downscaled products</oasis:entry>

         <oasis:entry colname="col2">SPL2SMAP_S</oasis:entry>

         <oasis:entry colname="col3">1/3 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">6–12 d</oasis:entry>

         <oasis:entry colname="col6">2015–present</oasis:entry>

         <oasis:entry colname="col7">Das et al. (2019)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://nsidc.org/data/spl2smap_s</uri><inline-formula><mml:math id="M17" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">Downscaled ESA-CCI SSM</oasis:entry>

         <oasis:entry colname="col3">1 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2000–2020</oasis:entry>

         <oasis:entry colname="col7">Zheng et al. (2023)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.11888/RemoteSen.tpdc.272760</uri></oasis:entry>

         <oasis:entry colname="col9">Seamless</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">Downscaled AMSR SM</oasis:entry>

         <oasis:entry colname="col3">1 km</oasis:entry>

         <oasis:entry colname="col4">China</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2003–2019</oasis:entry>

         <oasis:entry colname="col7">Song et al. (2022)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.11888/Hydro.tpdc.271762</uri></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">Downscaled ASCAT SM</oasis:entry>

         <oasis:entry colname="col3">1 km</oasis:entry>

         <oasis:entry colname="col4">Europe</oasis:entry>

         <oasis:entry colname="col5">1.5 d</oasis:entry>

         <oasis:entry colname="col6">2007–present</oasis:entry>

         <oasis:entry colname="col7">Wagner et al.   (2008)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://hsaf.meteoam.it/</uri><inline-formula><mml:math id="M18" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">ESSMRA</oasis:entry>

         <oasis:entry colname="col3">3 km</oasis:entry>

         <oasis:entry colname="col4">Europe</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2000–2015</oasis:entry>

         <oasis:entry colname="col7">Naz et al. (2020)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.1594/PANGAEA.907036</uri></oasis:entry>

         <oasis:entry colname="col9">Seamless</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2">SMAP-HydroBlocks</oasis:entry>

         <oasis:entry colname="col3">30 m</oasis:entry>

         <oasis:entry colname="col4">CONUS</oasis:entry>

         <oasis:entry colname="col5">6 h</oasis:entry>

         <oasis:entry colname="col6">2015–2019</oasis:entry>

         <oasis:entry colname="col7">Vergopolan et al. (2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.5281/zenodo.5206725</uri></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <?xmltex \mrwidth{2cm}?><oasis:entry rowsep="1" colname="col1" morerows="11">Microwave remote sensing products</oasis:entry>

         <oasis:entry colname="col2">Sentinel-1</oasis:entry>

         <oasis:entry colname="col3">1 km</oasis:entry>

         <oasis:entry colname="col4">Southern Italy</oasis:entry>

         <oasis:entry colname="col5">6–12 d</oasis:entry>

         <oasis:entry colname="col6">2015–2018</oasis:entry>

         <oasis:entry colname="col7">Balenzano et al. (2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.5281/zenodo.5006307</uri></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">CGLS Sentinel-1 SSM</oasis:entry>

         <oasis:entry colname="col3">1 km</oasis:entry>

         <oasis:entry colname="col4">Europe</oasis:entry>

         <oasis:entry colname="col5">1.5–8 d</oasis:entry>

         <oasis:entry colname="col6">2014–present</oasis:entry>

         <oasis:entry colname="col7">Bauer-Marschallinger et al.  (2019)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://land.copernicus.eu/global/products/ssm</uri><inline-formula><mml:math id="M19" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">ASCAT</oasis:entry>

         <oasis:entry colname="col3">12.5/25 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2007–present</oasis:entry>

         <oasis:entry colname="col7">Bartalis et al.   (2007)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://hsaf.meteoam.it/</uri><inline-formula><mml:math id="M20" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">AMSR-E/AMSR2</oasis:entry>

         <oasis:entry colname="col3">25 km <?xmltex \hack{\hfill\break}?>10/25 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2002–2011 <?xmltex \hack{\hfill\break}?>2012–present</oasis:entry>

         <oasis:entry colname="col7">Owe et al. (2008)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://search.earthdata.nasa.gov/search</uri><inline-formula><mml:math id="M21" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">Fengyun-3</oasis:entry>

         <oasis:entry colname="col3">25km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2011–2020</oasis:entry>

         <oasis:entry colname="col7">Yang et al.  (2012)</oasis:entry>

         <oasis:entry colname="col8"><uri>http://satellite.nsmc.org.cn/</uri><inline-formula><mml:math id="M22" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">SMAP-L3</oasis:entry>

         <oasis:entry colname="col3">36 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2015–present</oasis:entry>

         <oasis:entry colname="col7">O'Neill et al.   (2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://nsidc.org/data/SPL3SMP/versions/8</uri><inline-formula><mml:math id="M23" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">SMAP-IB</oasis:entry>

         <oasis:entry colname="col3">36 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2015–2021</oasis:entry>

         <oasis:entry colname="col7">Li et al. (2022)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://ib.remote-sensing.inrae.fr/</uri><inline-formula><mml:math id="M24" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">SMOS CATDS Level 3</oasis:entry>

         <oasis:entry colname="col3">25 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2010–present</oasis:entry>

         <oasis:entry colname="col7">Al Bitar et al.  (2017)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://www.catds.fr/sipad/</uri><inline-formula><mml:math id="M25" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">SMOS-IC</oasis:entry>

         <oasis:entry colname="col3">25 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2010–2021</oasis:entry>

         <oasis:entry colname="col7">Wigneron et al. (2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://ib.remote-sensing.inrae.fr/</uri><inline-formula><mml:math id="M26" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">SGD-SM</oasis:entry>

         <oasis:entry colname="col3">0.25<inline-formula><mml:math id="M27" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2013–2019</oasis:entry>

         <oasis:entry colname="col7">Zhang et al.  (2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.5281/zenodo.4417458</uri></oasis:entry>

         <oasis:entry colname="col9">Seamless</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col2">MCCA-AMSR <?xmltex \hack{\hfill\break}?>MCCA-SMAP</oasis:entry>

         <oasis:entry colname="col3">0.25<inline-formula><mml:math id="M28" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> <?xmltex \hack{\hfill\break}?>36 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2002–2021 <?xmltex \hack{\hfill\break}?>2015–2022</oasis:entry>

         <oasis:entry colname="col7">Zhao et al.  (2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://doi.org/10.11888/Terre.tpdc.272907</uri> <?xmltex \hack{\hfill\break}?> <uri>https://doi.org/10.11888/Terre.tpdc.272088</uri></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2">ESA CCI</oasis:entry>

         <oasis:entry colname="col3">0.25<inline-formula><mml:math id="M29" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">1978–2021</oasis:entry>

         <oasis:entry colname="col7">Gruber et al.  (2019)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://esa-soilmoisture-cci.org/data</uri><inline-formula><mml:math id="M30" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">–</oasis:entry>

       </oasis:row>
       <oasis:row>

         <?xmltex \mrwidth{2cm}?><oasis:entry rowsep="1" colname="col1" morerows="1">Reanalysis products</oasis:entry>

         <oasis:entry colname="col2">GLDAS-Noah</oasis:entry>

         <oasis:entry colname="col3">0.25<inline-formula><mml:math id="M31" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">3 h</oasis:entry>

         <oasis:entry colname="col6">2000–2021</oasis:entry>

         <oasis:entry colname="col7">Beaudoing and Rodell   (2020)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://hydro1.gesdisc.eosdis.nasa.gov/data/GLDAS/GLDAS_NOAH025_3H.2.1/</uri><inline-formula><mml:math id="M32" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">Seamless</oasis:entry>

       </oasis:row>
       <oasis:row rowsep="1">

         <oasis:entry colname="col2">ERA5-Land</oasis:entry>

         <oasis:entry colname="col3">0.1<inline-formula><mml:math id="M33" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Hourly</oasis:entry>

         <oasis:entry colname="col6">1950–present</oasis:entry>

         <oasis:entry colname="col7">Muñoz-Sabater   (2019, 2021)</oasis:entry>

         <oasis:entry colname="col8"><uri>https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land</uri><inline-formula><mml:math id="M34" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">Seamless</oasis:entry>

       </oasis:row>
       <oasis:row>

         <oasis:entry colname="col1">Present study</oasis:entry>

         <oasis:entry colname="col2">GLASS SM</oasis:entry>

         <oasis:entry colname="col3">1 km</oasis:entry>

         <oasis:entry colname="col4">Global</oasis:entry>

         <oasis:entry colname="col5">Daily</oasis:entry>

         <oasis:entry colname="col6">2000–2020</oasis:entry>

         <oasis:entry colname="col7">–</oasis:entry>

         <oasis:entry colname="col8"><uri>http://glass.umd.edu/soil_moisture/</uri><inline-formula><mml:math id="M35" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>

         <oasis:entry colname="col9">Seamless</oasis:entry>

       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup><?xmltex \end{scaleboxenv}?></oasis:table><?xmltex \begin{scaleboxenv}{.80}[.80]?><table-wrap-foot><p id="d1e368"><inline-formula><mml:math id="M16" display="inline"><mml:msup><mml:mi/><mml:mo>*</mml:mo></mml:msup></mml:math></inline-formula> Last access: 12 May 2023. SSM represents surface soil moisture. CGLS represents the Copernicus Global Land Service.</p></table-wrap-foot><?xmltex \end{scaleboxenv}?><?xmltex \gdef\@currentlabel{1}?></table-wrap>

</sec>
<sec id="Ch1.S2">
  <label>2</label><title>Datasets</title>
      <p id="d1e1269">The multisource datasets used to generate the global high-resolution soil
moisture product here can be grouped into four categories (Table 2). Namely,
remotely sensed variables derived from the three GLASS products, reanalysis
surface soil moisture from the ERA5-Land dataset, and auxiliary variables
extracted from the Multi-Error-Removed Improved-Terrain (MERIT) DEM and
SoilGrids products were used to train an XGBoost model for estimating the
global soil moisture product, whereas globally distributed in situ soil
moisture measurements from ISMN stations were used as targets for model
training. In addition, four independent in situ soil moisture datasets and
two microwave soil moisture products were used to validate and compare the
derived global product.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T2" specific-use="star"><?xmltex \currentcnt{2}?><label>Table 2</label><caption><p id="d1e1275">Multisource datasets used to generate the global high-resolution
soil moisture product.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="3cm"/>
     <oasis:colspec colnum="3" colname="col3" align="justify" colwidth="3cm"/>
     <oasis:colspec colnum="4" colname="col4" align="justify" colwidth="3cm"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Category</oasis:entry>
         <oasis:entry colname="col2">Dataset</oasis:entry>
         <oasis:entry colname="col3">Spatial resolution</oasis:entry>
         <oasis:entry colname="col4">Temporal resolution</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Satellite products</oasis:entry>
         <oasis:entry colname="col2">GLASS albedo <?xmltex \hack{\hfill\break}?>GLASS LST <?xmltex \hack{\hfill\break}?>GLASS LAI</oasis:entry>
         <oasis:entry colname="col3">500 m <?xmltex \hack{\hfill\break}?>1 km <?xmltex \hack{\hfill\break}?>500 m</oasis:entry>
         <oasis:entry colname="col4">4 d <?xmltex \hack{\hfill\break}?>Daily <?xmltex \hack{\hfill\break}?>8 d</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Reanalysis product</oasis:entry>
         <oasis:entry colname="col2">ERA5-Land SSM</oasis:entry>
         <oasis:entry colname="col3">0.1<inline-formula><mml:math id="M36" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col4">Hourly</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Auxiliary datasets</oasis:entry>
         <oasis:entry colname="col2">MERIT DEM <?xmltex \hack{\hfill\break}?>SoilGrids 2.0</oasis:entry>
         <oasis:entry colname="col3">90 m <?xmltex \hack{\hfill\break}?>250 m</oasis:entry>
         <oasis:entry colname="col4">– <?xmltex \hack{\hfill\break}?>–</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Ground-based data</oasis:entry>
         <oasis:entry colname="col2">ISMN SSM</oasis:entry>
         <oasis:entry colname="col3">Point scale</oasis:entry>
         <oasis:entry colname="col4">Hourly</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><?xmltex \gdef\@currentlabel{2}?></table-wrap>

<?xmltex \hack{\newpage}?>
<sec id="Ch1.S2.SS1">
  <label>2.1</label><title>Remotely sensed datasets</title>
      <p id="d1e1409">The GLASS product suite has been employed in various applications owing to
its long-term coverage, spatial continuity, high spatial resolution, and
accuracy  (Liang et al., 2021). Here, the latest version of
GLASS albedo, LST, and LAI products served as the primary inputs to the
ensemble learning model. Specifically, the GLASS V6 LAI product (500 m
resolution) was generated from six MODIS 8 d surface reflectance bands of
MOD09A1 using a bidirectional long short-term memory deep learning model
(<uri>http://www.glass.umd.edu</uri>, last access: 12 May 2023)
(Ma and Liang,
2022). Notably, this product is relatively more accurate than the 250 m
GLASS LAI estimated from two bands of MOD09Q1. The all-sky 1 km GLASS LST
was produced by integrating multiple datasets from MODIS, reanalysis, and in
situ LST measurements using a random forest model
(Li et al.,
2021). Daily global LSTs averaged from instantaneous GLASS LST products were
used here, which will be also released at <uri>http://www.glass.umd.edu</uri>.
The gap-free GLASS albedo products were generated using a combination of a
direct-estimation algorithm  (Qu et al., 2014) and a
spatiotemporal filtering scheme  (Liu et al., 2013). Namely,
the black-sky visible, near-infrared, and shortwave albedo data extracted from
the GLASS V42 albedo products were used in the present study (<uri>http://www.glass.umd.edu</uri>).</p>
</sec>
<sec id="Ch1.S2.SS2">
  <label>2.2</label><title>ERA5-Land reanalysis soil moisture product</title>
      <p id="d1e1429">ERA5 provides a range of global atmospheric, terrestrial, and oceanic
variables from 1950 to present at 31 km spatial resolution
(Hersbach et al., 2020). Specifically,
ERA5-Land is an enhanced global land reanalysis dataset obtained by
downscaling the atmospheric forcing derived from the reanalysis of ERA5 to a
native resolution of approximately 9 km  (Muñoz-Sabater
et al., 2021). ERA5-Land includes hourly estimates of volumetric soil
moisture at four soil layers and a grid spacing of 0.1<inline-formula><mml:math id="M37" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>
(<uri>https://cds.climate.copernicus.eu/</uri>, last access: 12 May 2023). In the present study, the
top layer (0–7 cm) ERA5-Land soil moisture were used to match the
shallow observation depths of optical satellites. The daily average soil
moisture was calculated and resampled to 1 km before being used as an input
variable of the ensemble learning model.</p>
</sec>
<sec id="Ch1.S2.SS3">
  <label>2.3</label><title>Static terrain and soil texture datasets</title>
      <?pagebreak page2059?><p id="d1e1452">Topography and soil properties, which can be treated as static variables due
to their relatively slow rate of change over the short term, have an
important influence on the spatial variations of soil moisture at finer
scales. The global terrain dataset used in the study here was the
high-accuracy MERIT DEM with a spatial resolution of 3 arcsec
(<inline-formula><mml:math id="M38" display="inline"><mml:mo lspace="0mm">∼</mml:mo></mml:math></inline-formula> 90 m at the Equator). The MERIT DEM integrates several
spaceborne DEMs after eliminating their inherent primary error components,
including speckle noise, stripe noise, absolute bias, and tree height bias
(<uri>http://hydro.iis.u-tokyo.ac.jp/~yamadai/MERIT_DEM/</uri>, last access: 12 May 2023)  (Yamazaki et al., 2017).
After deriving the elevation, aspect, and slope from the MERIT DEM, these
topographic variables were resampled to 1 km and used as input features for
the model. Alternatively, soil texture was derived from the SoilGrids V2.0
product at 250 m resolution (<uri>https://www.isric.org/explore/soilgrids</uri>, last access: 12 May 2023). SoilGrids uses <inline-formula><mml:math id="M39" display="inline"><mml:mo>&gt;</mml:mo></mml:math></inline-formula> 240 000 soil profile measurements and <inline-formula><mml:math id="M40" display="inline"><mml:mo>&gt;</mml:mo></mml:math></inline-formula> 400 environmental
covariates worldwide to train machine learning models and produce global
soil property maps across six depth intervals  (Poggio et al.,
2021). Recent studies have shown that the SoilGrids product has both higher
resolution and enhanced accuracy compared to other soil datasets at the global
scale  (Dai et al., 2019), in addition to the ability of soil
texture data to improve the bias and root mean square error (RMSE) of
downscaled soil moisture products
(Das et al.,
2019). Accordingly, the mean contents of sand, silt, and clay were extracted
for the first soil layer (0–5 cm) from the SoilGrids database and
resampled to 1 km.</p>
</sec>
<sec id="Ch1.S2.SS4">
  <label>2.4</label><title>Ground-based soil moisture training dataset</title>
      <p id="d1e1490">The ISMN aims to establish and maintain a global database of in situ soil
moisture measurements for the validation and improvement of satellite-based
and modeled soil moisture products. Currently, it consists of 73 networks
with over 2800 soil moisture stations worldwide, providing
quality-controlled and harmonized datasets collected from monitoring
networks and field experiments  (Dorigo et al., 2021). Here,
data for the period from 2000–2018 were obtained (<uri>https://ismn.earth/</uri>, last access: 12 May 2023), and only stations with a sensing depth of <inline-formula><mml:math id="M41" display="inline"><mml:mo>&lt;</mml:mo></mml:math></inline-formula> 5 cm were selected to match the observation depth of remotely sensed
datasets. Soil moisture records were then screened according to the quality
flags provided with the ISMN dataset
(Dorigo et al., 2013), before being used
as the training target for the machine learning model. The spatial
distribution of the representative ISMN soil moisture stations selected
using the TC method described in Sect. 3.2 is displayed in Fig. 1. The
number and percentage of representative stations for each land cover type
and climate class, which are calculated by using the 500 m MODIS land cover
type product  (Friedl and Sulla-Menashe, 2019) and the 1 km
Köppen–Geiger climate classification dataset  (Cui et
al., 2021), respectively, are also shown in Table 3.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F1" specific-use="star"><?xmltex \currentcnt{1}?><?xmltex \def\figurename{Figure}?><label>Figure 1</label><caption><p id="d1e1505">Spatial distribution of the 715 representative ISMN soil moisture
stations used for training the model and four independent soil moisture
networks used for validation, with the MODIS land cover type product
(MCD12Q1) for 2016 displayed in the background.</p></caption>
          <?xmltex \igopts{width=384.112205pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f01.jpg"/>

        </fig>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T3" specific-use="star"><?xmltex \currentcnt{3}?><label>Table 3</label><caption><p id="d1e1517">The number and percentage of representative ISMN soil moisture
stations for each climate class and land cover type.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="6">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="right" colsep="1"/>
     <oasis:colspec colnum="4" colname="col4" align="left"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Climate class</oasis:entry>
         <oasis:entry colname="col2">No.</oasis:entry>
         <oasis:entry colname="col3">%</oasis:entry>
         <oasis:entry colname="col4">Land cover type</oasis:entry>
         <oasis:entry colname="col5">No.</oasis:entry>
         <oasis:entry colname="col6">%</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Tropical</oasis:entry>
         <oasis:entry colname="col2">8</oasis:entry>
         <oasis:entry colname="col3">1.1</oasis:entry>
         <oasis:entry colname="col4">Forests</oasis:entry>
         <oasis:entry colname="col5">35</oasis:entry>
         <oasis:entry colname="col6">4.9</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Arid</oasis:entry>
         <oasis:entry colname="col2">135</oasis:entry>
         <oasis:entry colname="col3">18.9</oasis:entry>
         <oasis:entry colname="col4">Shrublands</oasis:entry>
         <oasis:entry colname="col5">16</oasis:entry>
         <oasis:entry colname="col6">2.2</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Temperate, dry summer</oasis:entry>
         <oasis:entry colname="col2">125</oasis:entry>
         <oasis:entry colname="col3">17.5</oasis:entry>
         <oasis:entry colname="col4">Savannas</oasis:entry>
         <oasis:entry colname="col5">185</oasis:entry>
         <oasis:entry colname="col6">25.9</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Temperate, dry winter</oasis:entry>
         <oasis:entry colname="col2">2</oasis:entry>
         <oasis:entry colname="col3">0.3</oasis:entry>
         <oasis:entry colname="col4">Grasslands</oasis:entry>
         <oasis:entry colname="col5">327</oasis:entry>
         <oasis:entry colname="col6">45.7</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Temperate, no dry season</oasis:entry>
         <oasis:entry colname="col2">194</oasis:entry>
         <oasis:entry colname="col3">27.1</oasis:entry>
         <oasis:entry colname="col4">Urban</oasis:entry>
         <oasis:entry colname="col5">12</oasis:entry>
         <oasis:entry colname="col6">1.7</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Cold, dry summer</oasis:entry>
         <oasis:entry colname="col2">36</oasis:entry>
         <oasis:entry colname="col3">5.0</oasis:entry>
         <oasis:entry colname="col4">Croplands</oasis:entry>
         <oasis:entry colname="col5">130</oasis:entry>
         <oasis:entry colname="col6">18.2</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Cold, dry winter</oasis:entry>
         <oasis:entry colname="col2">6</oasis:entry>
         <oasis:entry colname="col3">0.8</oasis:entry>
         <oasis:entry colname="col4">Barren</oasis:entry>
         <oasis:entry colname="col5">10</oasis:entry>
         <oasis:entry colname="col6">1.4</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Cold, no dry season</oasis:entry>
         <oasis:entry colname="col2">176</oasis:entry>
         <oasis:entry colname="col3">24.6</oasis:entry>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6"/>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Polar</oasis:entry>
         <oasis:entry colname="col2">33</oasis:entry>
         <oasis:entry colname="col3">4.6</oasis:entry>
         <oasis:entry colname="col4"/>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6"/>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><?xmltex \gdef\@currentlabel{3}?></table-wrap>

</sec>
<sec id="Ch1.S2.SS5">
  <label>2.5</label><title>Independent in situ validation datasets</title>
      <p id="d1e1767">Four soil moisture monitoring networks that were not included in the ISMN
database were used to assess the model's ability to capture temporal
variations in soil moisture over unknown area (Fig. 1). The YA and YB
subnetworks are both part of the Yanco soil moisture network, located in a
semiarid agricultural region of the Murrumbidgee River basin, Australia,
with a flat topography and elevation spanning 117–150 m
(Yee
et al., 2017). There are 13 and 11 stations in the YA and YB subnetworks,
respectively, distributed across two 9 <inline-formula><mml:math id="M42" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 9 km areas, and soil
moisture observations from these stations can be downloaded from the Oznet
Hydrological Monitoring website (<uri>http://www.oznet.org.au</uri>, last access: 12 May 2023)
(Smith et al., 2012). Two other micronets (Fort
Cobb and Little Washita) are located in southwestern Oklahoma, USA, and are
characterized by a humid subtropical climate
(Starks et al., 2014). The primary land
cover types are cropland and rangeland, and the topography is moderately
rolling     (Bindlish et al., 2009).
Currently, there are 15 and 20 operational stations in the Fort Cobb and
Little Washita networks, respectively, for which soil moisture datasets can
be accessed through the Grazinglands Research Laboratory (<uri>https://ars.mesonet.org/</uri>, last access: 12 May 2023). These four dense soil moisture networks have
been used extensively to either validate or calibrate satellite soil
moisture products
(Ma
et al., 2021; Colliander et al., 2017; Chan et al., 2018).</p>
</sec>
<sec id="Ch1.S2.SS6">
  <label>2.6</label><title>Microwave soil moisture product</title>
      <p id="d1e1791">To further validate the spatiotemporal performance of the derived 1 km soil
moisture product here, two additional microwave-based products were selected
for comparison. The first product is the high-resolution SMAP/Sentinel-1
SPL2SMAP_S dataset, which contains the first global 1 km<?pagebreak page2060?> soil
moisture product that was publicly released in the past (Table 1). It has a
temporal resolution of 6–12 d and can be downloaded from the National
Snow and Ice Data Center at 1  and 3 km resolutions
(<uri>https://nsidc.org/data/spl2smap_s</uri>, last access: 12 May 2023). According to Das et al. (2019), the average
unbiased RMSE (ubRMSE) values achieved by both the 1 and 3 km
SPL2SMAP_S products over sparse soil moisture networks were
approximately 0.05 m<inline-formula><mml:math id="M43" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M44" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. Considering that the
SPL2SMAP_S baseline algorithm generally shows higher
validation accuracy than the optional algorithm (directly disaggregating the
SMAP 9 km soil moisture product) and that the AM (descending orbit combination)
soil moisture retrievals are more accurate than their APM (a.m. or p.m.) equivalents
(descending or ascending orbits combination)
(Xu, 2020), the
baseline AM soil moisture field “disagg_soil_moisture_1km” data were extracted from the
SPL2SMAP_S 1 km data group and used for comparison. The
second product is the CCI global soil moisture dataset released by the ESA,
with a grid spacing of 0.25<inline-formula><mml:math id="M45" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> and daily temporal resolution, which
combines various passive and active microwave soil moisture products into a
harmonized record with improved spatiotemporal coverages and has been fully
validated across numerous global applications
(Gruber et al.,
2019; Dorigo et al., 2017). Specifically, the combined (active and passive)
soil moisture product from CCI V6.1 was used here (<uri>https://esa-soilmoisture-cci.org/data</uri>, last access: 12 May 2023).</p>
</sec>
</sec>
<?pagebreak page2061?><sec id="Ch1.S3">
  <label>3</label><title>Methods</title>
<sec id="Ch1.S3.SS1">
  <label>3.1</label><title>Overall framework</title>
      <p id="d1e1846">Soil moisture is characterized by high spatiotemporal variability, and its
distribution is influenced by a range of environmental factors across
different scales, such as climate, geographical conditions, soil properties,
and surface coverage
(Crow et al., 2012;
Luo et al., 2022). Here, high-accuracy, spatiotemporally continuous GLASS
products, including LST, albedo, and LAI, were used to provide surface
temperature, spectral information on soil and vegetation, and
information related to vegetation type and density. Considering the impact
of topography and soil properties on soil moisture, topographic and soil
texture fraction variables were extracted from the MERIT DEM and SoilGrids
products, respectively. Additionally, the 0.1<inline-formula><mml:math id="M46" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> ERA5-Land
reanalysis soil moisture product was used to provide background soil
moisture information. By utilizing an ensemble machine learning model,
various variables extracted from these multisource datasets were integrated
so that different environmental factors affecting soil moisture could be
accounted for, and then soil moisture at fine scales could be estimated.</p>
      <p id="d1e1858">Figure 2 shows a flowchart of the proposed 1 km, spatiotemporally continuous
soil moisture estimation framework. Prior to the training phase, the TC
method and the other two long-term soil moisture datasets (ERA5-Land
reanalysis and ESA CCI soil moisture products) were adopted for selecting
the representative soil moisture stations, considering the scale difference
between point-scale soil moisture measurements collected by ISMN stations
and GLASS products (the detailed selection procedure is presented in Sect. 3.2). Then, multiple variables were extracted from the corresponding input
datasets and spatiotemporally collocated with the in situ soil moisture
measurements from the representative stations between 2000 and 2018.
Specifically. the black-sky visible, near-infrared, shortwave albedo, LAI,
and LST were extracted from the three GLASS products, based on the
geographic locations of stations. Each of these variables, together with
topographic and soil texture fraction variables, and the coarse-scale
reanalysis soil moisture were put into the XGBoost model, which was chosen
to simulate the nonlinear relationship between multiple input features and
in situ soil moisture (the target variable). Lastly, those multisource
input datasets were resampled to 1 km and then put into the developed
XGBoost model for predicting the global 1 km spatiotemporally continuous
soil moisture product (GLASS SM). Moreover, the GLASS SM product was
evaluated against four independent soil moisture datasets and then compared
the SPL2SMAP_S and CCI soil moisture products for
spatiotemporal consistency analyses.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F2" specific-use="star"><?xmltex \currentcnt{2}?><?xmltex \def\figurename{Figure}?><label>Figure 2</label><caption><p id="d1e1863">Flowchart of the proposed 1 km spatiotemporally continuous soil
moisture estimation framework.</p></caption>
          <?xmltex \igopts{width=355.659449pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f02.png"/>

        </fig>

<?xmltex \hack{\newpage}?>
</sec>
<sec id="Ch1.S3.SS2">
  <label>3.2</label><title>Triple-collocation-based station selection</title>
      <p id="d1e1882">As mentioned above, in situ soil moisture data from the ISMN stations were
employed as the target variable to train the XGBoost model, which was then
used to predict soil moisture product at 1 km resolution. The underlying
assumption was that the measured soil moisture at these point-scale stations
is representative of the average moisture status of the corresponding 1 km
pixel; however, because of the high spatiotemporal variability of soil
moisture, this assumption is not always upheld. Accordingly, the TC method,
which has been widely applied to analyze the coarse-scale spatial
representativeness of in situ soil moisture dataset
(Gruber
et al., 2013; Molero et al., 2018), was adopted here to select the most
representative stations. Specifically, TC is an error analysis method
proposed by Stoffelen (1998) employing three
collocated datasets to address large uncertainties in wind speed
measurements. TC has also been widely used in the evaluation of satellite
soil moisture products given the limited number of core validation sites at
the satellite footprint scale
(Zheng
et al., 2022). The commonly used error model for TC analysis is defined in
Eq. (1):
            <disp-formula id="Ch1.E1" content-type="numbered"><label>1</label><mml:math id="M47" display="block"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="italic">α</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msub><mml:mi mathvariant="italic">β</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mo>+</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msub><mml:mi mathvariant="italic">ε</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
          where <inline-formula><mml:math id="M48" display="inline"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> refers to the three collocated soil moisture observations;
<inline-formula><mml:math id="M49" display="inline"><mml:mi mathvariant="italic">θ</mml:mi></mml:math></inline-formula> refers to the unknown true value of soil moisture; <inline-formula><mml:math id="M50" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">α</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>
and <inline-formula><mml:math id="M51" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">β</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> are the additive and multiplicative biases of <inline-formula><mml:math id="M52" display="inline"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>
relative to the true value, respectively; and <inline-formula><mml:math id="M53" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ε</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the
random additive noise with zero mean. The assumptions underlying this error
model and detailed derivation process for the error variance of each dataset
can be found in Gruber et al. (2016).
Notably, the assumptions made for TC analysis are similar to those made for
the correlation coefficient (<inline-formula><mml:math id="M54" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula>) and RMSE
(Gruber et al., 2016). To fulfill the
independent error requirement of the TC analysis across the three datasets,
the ISMN in situ soil moisture, model-based ERA5-Land soil moisture, and CCI
combined microwave soil moisture were selected to construct the triplet.
Among them, the CCI soil moisture product was selected here rather than
other microwave soil moisture products, as it maintains a sufficiently long
timescale to cover that of the training samples. The error variance of the
ISMN soil moisture dataset, <inline-formula><mml:math id="M55" display="inline"><mml:mrow><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="italic">ε</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>, was then
calculated according to Eq. (2):
            <disp-formula id="Ch1.E2" content-type="numbered"><label>2</label><mml:math id="M56" display="block"><mml:mrow><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="italic">ε</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup><mml:mo>=</mml:mo><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup><mml:mo>-</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi mathvariant="normal">Cov</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">era</mml:mi></mml:msub><mml:mo>)</mml:mo><mml:mi mathvariant="normal">Cov</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">cci</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mi mathvariant="normal">Cov</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">era</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">cci</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
          where <inline-formula><mml:math id="M57" display="inline"><mml:mrow><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> is the variance of the ISMN in situ soil
moisture; Cov is the covariance operator; and <inline-formula><mml:math id="M58" display="inline"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M59" display="inline"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">era</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and
<inline-formula><mml:math id="M60" display="inline"><mml:mrow><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">cci</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> denote the collocated ISMN, ERA5-Land, and CCI soil moisture
observations, respectively. Based on TC analysis, McColl et al. (2014) proposed a method called extended triple
collocation (ETC) to estimate the correlation coefficient between each
dataset and the unknown target variable. Specifically, the ETC correlation
coefficient of the ISMN soil moisture dataset, <inline-formula><mml:math id="M61" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">ETC</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, can be calculated
via Eq. (3):
            <disp-formula id="Ch1.E3" content-type="numbered"><label>3</label><mml:math id="M62" display="block"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">ETC</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:mi mathvariant="normal">sign</mml:mi><mml:mo>(</mml:mo><mml:mo>±</mml:mo><mml:mo>)</mml:mo><mml:msqrt><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi mathvariant="normal">Cov</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">era</mml:mi></mml:msub><mml:mo>)</mml:mo><mml:mi mathvariant="normal">Cov</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">cci</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">ismn</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup><mml:mi mathvariant="normal">Cov</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">era</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msub><mml:mi>X</mml:mi><mml:mi mathvariant="normal">cci</mml:mi></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:mfrac></mml:mstyle></mml:msqrt><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>
          where the sign of <inline-formula><mml:math id="M63" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">ETC</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> was corrected to positive. It is a<?pagebreak page2062?> scaled,
unbiased signal-to-noise-ratio metric complementary to <inline-formula><mml:math id="M64" display="inline"><mml:mrow><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="italic">ε</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula>. Using the above TC-based metrics and referring to previous studies
(Yuan
et al., 2020; Anderson et al., 2012), several strict conditions were
established to select the most representative ISMN stations: (1) <inline-formula><mml:math id="M65" display="inline"><mml:mo>&gt;</mml:mo></mml:math></inline-formula> 500 triplets were available at the station during the period
2000–2018, (2) the <inline-formula><mml:math id="M66" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> between any two soil moisture datasets in the triplets
was <inline-formula><mml:math id="M67" display="inline"><mml:mo>&gt;</mml:mo></mml:math></inline-formula> 0.2, (3) the square root of the <inline-formula><mml:math id="M68" display="inline"><mml:mrow><mml:msubsup><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="italic">ε</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msubsup></mml:mrow></mml:math></inline-formula> calculated for the ISMN soil moisture dataset was <inline-formula><mml:math id="M69" display="inline"><mml:mo>&lt;</mml:mo></mml:math></inline-formula> 0.06 m<inline-formula><mml:math id="M70" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M71" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, and (4) the <inline-formula><mml:math id="M72" display="inline"><mml:mrow><mml:msub><mml:mi>R</mml:mi><mml:mi mathvariant="normal">ETC</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> between the ISMN soil moisture and
the unknown soil moisture true values was <inline-formula><mml:math id="M73" display="inline"><mml:mo>&gt;</mml:mo></mml:math></inline-formula> 0.7. A total of 715
representative ISMN soil moisture stations were finally selected, as shown
in Fig. 1.</p>
</sec>
<sec id="Ch1.S3.SS3">
  <label>3.3</label><title>XGBoost model</title>
      <p id="d1e2361">Ensemble machine learning models can be roughly classified into two
categories based on how the individual learners are generated: bagging and
boosting  (Zhou, 2021). For bagging models, the individual
learners are constructed independently; whereas for boosting models,
learners are constructed iteratively, increasing the weights for the
incorrectly classified samples during each round of training. As a
representative bagging algorithm, random forest has gained considerable
attention in the fields of remote sensing classification and regression over
recent decades    (Belgiu and Drãguþ, 2016);
however, it may suffer from a large prediction bias, especially when the
observations are too large or small (Song, 2015). In
contrast, boosting models have been shown to reduce both variance and bias
and are robust to multicollinearity among predictors
(Gislason
et al., 2006; Karthikeyan and Mishra, 2021). Accordingly, the present study
employed the XGBoost model implemented by Chen and Guestrin (2016) based on a gradient boosting framework
(Friedman, 2001). The XGBoost model is advantageous for its
scalability, efficiency, and decreased vulnerability to overfitting. Here,
the open-source <italic>xgboost</italic> and <italic>Scikit-learn</italic> Python packages were used together for model training
and hyperparameters tuning, with the grid search method being adopted to
determine the optimal parameters. Here, the key hyperparameters of the
XGBoost models were finally set to n_estimators (the number
of the boosting rounds) <inline-formula><mml:math id="M74" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 1000, learning_rate <inline-formula><mml:math id="M75" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 0.1, and
max_depth (maximum tree depth) <inline-formula><mml:math id="M76" display="inline"><mml:mo>=</mml:mo></mml:math></inline-formula> 8.</p>
</sec>
<sec id="Ch1.S3.SS4">
  <label>3.4</label><title>Evaluation strategies and performance metrics</title>
      <p id="d1e2399">While most previous soil moisture estimation studies based on machine
learning have only used the random validation approach, this study used the
three complementary validation strategies to fully evaluate the model
performance: random, site independent, and year independent. For the random
validation, samples from all soil moisture stations during 2000–2018 were
randomly divided into five folds, among which three folds were used for
training, one as the validation dataset to tune the hyperparameters of the
model, and one as the test dataset to evaluate the model performance. Thus,
the samples in the random test dataset may have been from the same station
or year as the training or validation datasets. For site-independent
validation, all soil moisture stations were again randomly divided into five
folds, and samples from one fold were used as the test dataset to evaluate
the accuracy of models trained with samples from the other folds,<?pagebreak page2063?> which were
used for training and validation. Thus, the location of the samples in the
site-independent test dataset is unknown to the model. Similarly, for the
year-independent validation, samples from all stations between 2015 and 2018
were selected as the test dataset to evaluate the accuracy of the model
trained using samples between 2000 and 2014, to ensure that the observation
year was unknown to the model.</p>
      <p id="d1e2402">In addition to model evaluation, the accuracy of the GLASS SM product
generated by the developed model was evaluated. This 1 km soil moisture
product was first validated against four independent dense soil moisture
networks and then compared with the 1 km SPL2SMAP_S and
0.25<inline-formula><mml:math id="M77" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> CCI soil moisture products for spatiotemporal consistency
analyses. Four widely used performance metrics in soil-moisture-related
research – the <inline-formula><mml:math id="M78" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula>, bias, RMSE, and ubRMSE  (Entekhabi et al.,
2010) – are used to evaluate both the models and products against in situ
dataset, which can be calculated according to Eqs. (4)–(7):

                <disp-formula specific-use="gather" content-type="numbered"><mml:math id="M79" display="block"><mml:mtable displaystyle="true"><mml:mlabeledtr id="Ch1.E4"><mml:mtd><mml:mtext>4</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi>E</mml:mi><mml:mo>[</mml:mo><mml:mo>(</mml:mo><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:mi>E</mml:mi><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>)</mml:mo><mml:mo>(</mml:mo><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:mi>E</mml:mi><mml:mfenced open="[" close="]"><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>)</mml:mo><mml:mo>]</mml:mo></mml:mrow><mml:mrow><mml:msub><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E5"><mml:mtd><mml:mtext>5</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mi mathvariant="normal">bias</mml:mi><mml:mo>=</mml:mo><mml:mi>E</mml:mi><mml:mfenced open="[" close="]"><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>-</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:mi>E</mml:mi><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E6"><mml:mtd><mml:mtext>6</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mi mathvariant="normal">RMSE</mml:mi><mml:mo>=</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:msqrt><mml:mrow><mml:mi>E</mml:mi><mml:mo>[</mml:mo><mml:mo>(</mml:mo><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub><mml:msup><mml:mo>)</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:mo>]</mml:mo></mml:mrow></mml:msqrt><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr><mml:mlabeledtr id="Ch1.E7"><mml:mtd><mml:mtext>7</mml:mtext></mml:mtd><mml:mtd><mml:mrow><mml:mstyle class="stylechange" displaystyle="true"/><mml:mi mathvariant="normal">ubRMSE</mml:mi><mml:mo>=</mml:mo><mml:mspace width="0.125em" linebreak="nobreak"/><mml:msqrt><mml:mrow><mml:mi>E</mml:mi><mml:mo mathvariant="italic">{</mml:mo><mml:mo>[</mml:mo><mml:mfenced open="(" close=")"><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:mi>E</mml:mi><mml:mfenced open="[" close="]"><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub></mml:mrow></mml:mfenced></mml:mrow></mml:mfenced><mml:mo>-</mml:mo><mml:mspace linebreak="nobreak" width="0.125em"/><mml:mo>(</mml:mo><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub><mml:mo>-</mml:mo><mml:mi>E</mml:mi><mml:mfenced close="]" open="["><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub></mml:mrow></mml:mfenced><mml:mo>)</mml:mo><mml:msup><mml:mo>]</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:msup><mml:mo mathvariant="italic">}</mml:mo></mml:mrow></mml:msqrt><mml:mo>,</mml:mo></mml:mrow></mml:mtd></mml:mlabeledtr></mml:mtable></mml:math></disp-formula>

            where <inline-formula><mml:math id="M80" display="inline"><mml:mrow><mml:mi>E</mml:mi><mml:mo>[</mml:mo><mml:mo>.</mml:mo><mml:mo>]</mml:mo></mml:mrow></mml:math></inline-formula> denotes the mean operator; <inline-formula><mml:math id="M81" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M82" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">θ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>
represent the in situ soil moisture and corresponding estimated soil
moisture; and <inline-formula><mml:math id="M83" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">true</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M84" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">σ</mml:mi><mml:mi mathvariant="normal">est</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> refer to the standard
deviation of the in situ and estimated soil moisture values, respectively.
Note that, while comparing two soil moisture products with similar spatial
resolution in Sect. 4.4, the term “root mean square difference (RMSD)” is
used, despite the fact that it is also calculated using Eq. (6). Besides, when the
large-scale soil moisture product is validated against the point-scale in situ
soil moisture dataset, bias often exists between the two datasets because of
scale differences, and then <inline-formula><mml:math id="M85" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and ubRMSE are typically more informative than
RMSE.</p>
</sec>
</sec>
<sec id="Ch1.S4">
  <label>4</label><title>Results</title>
      <p id="d1e2724">In Sect. 4.1, the overall performance of the XGBoost models trained using
different groups of stations was first evaluated using random test samples.
Then, the performance of the models was evaluated on the site- or
year-independent test samples in Sect. 4.2, where the permutation feature
importance results of the models and the importance of each type of input
variables were examined, followed by an analysis of the model performance
metrics at each station and over each land cover type. Section 4.3 shows the
time-series validation results of the GLASS SM product generated using the
developed model on four independent soil moisture networks, whereas Sect. 4.4 compares the global 1 km GLASS SM product with two global microwave soil
moisture products for spatiotemporal consistency analyses.</p>
<sec id="Ch1.S4.SS1">
  <label>4.1</label><title>Model performance on the random test samples</title>
      <p id="d1e2734">Figure 3 shows the overall performance of the XGBoost models developed using
all input variables on the random test samples. To analyze the effect of
screening soil moisture stations, the accuracies of models developed using
all ISMN stations, the representative stations selected using the TC method,
and the stations excluded using the TC method (not included in the
representative stations) were compared via scatterplots. In general, the
random validation accuracy of all three XGBoost models was high, with the
bias between the model-predicted and target soil moisture values being close
to zero. The accuracy of the models developed using all ISMN stations or the
TC-excluded stations was similar for the test samples, with <inline-formula><mml:math id="M86" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values of
0.917 and 0.918 and RMSE values of 0.047  and 0.049 m<inline-formula><mml:math id="M87" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M88" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. In contrast, the accuracy of the model developed
with the representative stations selected using the TC method was
significantly improved for the test samples, with <inline-formula><mml:math id="M89" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE values of 0.941
and 0.038 m<inline-formula><mml:math id="M90" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M91" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. Compared with the other two
models, the soil moisture estimates of the XGBoost model developed using
representative stations were more concentrated along the <inline-formula><mml:math id="M92" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>:</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:math></inline-formula> line. Notably,
most of the soil moisture measurements that were nearly saturated
(<inline-formula><mml:math id="M93" display="inline"><mml:mo lspace="0mm">&gt;</mml:mo></mml:math></inline-formula> 0.5 m<inline-formula><mml:math id="M94" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M95" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>) were excluded after the station
screening process (Fig. 3), likely because those high soil moisture samples
at point scales were typically under-representative of the mean soil
moisture conditions at satellite footprint scales. Meanwhile, the validation
accuracy of the ERA5-Land surface soil moisture product was also calculated
for all soil moisture samples, as well as those selected by the TC method
for comparison. After station screening, the overall <inline-formula><mml:math id="M96" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> between ERA5-Land
reanalysis and in situ soil moisture increased from 0.56 to 0.64, while the
RMSE decreased slightly from 0.138 to 0.129 m<inline-formula><mml:math id="M97" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M98" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, and the bias
remained unchanged at 0.08 m<inline-formula><mml:math id="M99" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M100" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. The above performance metrics
indicated that representative stations can be effectively selected by using
the TC method, and training the XGBoost model with representative stations
can significantly improve its validation accuracy on the random test
samples.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F3" specific-use="star"><?xmltex \currentcnt{3}?><?xmltex \def\figurename{Figure}?><label>Figure 3</label><caption><p id="d1e2886">Scatterplots of measured and predicted soil moisture from the
XGBoost models developed using <bold>(a)</bold> all ISMN stations, <bold>(b)</bold> representative
stations selected using the TC method, and <bold>(c)</bold> stations excluded using the
TC method. Point colors indicate the probability density. Red dotted line
displays the linear regression, and the black solid line is the <inline-formula><mml:math id="M101" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>:</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:math></inline-formula> line.</p></caption>
          <?xmltex \igopts{width=441.017717pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f03.jpg"/>

        </fig>

</sec>
<sec id="Ch1.S4.SS2">
  <label>4.2</label><title>Model performance on site- or year-independent samples</title>
      <?pagebreak page2064?><p id="d1e2924">As can be seen from Table 4, regardless of the type of soil moisture station
used during training, model performance on the year-independent test samples
(2015 to 2018) decreased significantly compared to that on the random test
samples. Among them, the <inline-formula><mml:math id="M102" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values of the models trained using all stations
and TC-excluded stations were 0.8 and 0.734 for the year-independent test
samples, respectively, while the corresponding RMSE increased to 0.07 and
0.084 m<inline-formula><mml:math id="M103" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M104" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. In contrast, the XGBoost model trained
using representative stations selected by the TC method achieved the highest
accuracy on the year-independent test samples, with <inline-formula><mml:math id="M105" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE values of
0.873 and 0.054 m<inline-formula><mml:math id="M106" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M107" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. Likewise, the performance of
the models trained using three different types of stations on the
site-independent test samples (randomly selected one-fifth of the total
stations) further decreased compared to that of the year-independent test
samples. The RMSE values of the models trained using all and excluded stations
further increased to 0.093 and 0.106 m<inline-formula><mml:math id="M108" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M109" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively, for the
site-independent test samples. Alternatively, the XGBoost model trained
using representative stations achieved the highest accuracy for the
site-independent test samples, with <inline-formula><mml:math id="M110" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE values of 0.715 and 0.079 m<inline-formula><mml:math id="M111" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M112" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. These results suggest that the good
performance of the models on the random or year-independent test samples is
clearly a result of model overfitting, and their accuracies may degrade
significantly when the stations or observation years of the test samples are
unknown to them. The relatively lower accuracy achieved by the model
on site-independent test samples is least likely to be overfitted and can be
regarded as the model's true accuracy. Besides, it appears that increasing
the number of stations in the training dataset to account for spatial
heterogeneity is more important for improving the models' performance than
extending the duration of the measurements to account for temporal dynamics,
as also found in a previous study    (Zappa
et al., 2019). Moreover, training the model with representative stations
selected by the TC method can also considerably improve its performance on
site- or year-independent test samples, i.e., model performance over
unknown time and space.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T4" specific-use="star"><?xmltex \currentcnt{4}?><label>Table 4</label><caption><p id="d1e3036">Validation accuracy of the XGBoost models trained using three
different types of soil moisture stations on three types of test samples.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="10">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right" colsep="1"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:colspec colnum="7" colname="col7" align="right" colsep="1"/>
     <oasis:colspec colnum="8" colname="col8" align="right"/>
     <oasis:colspec colnum="9" colname="col9" align="right"/>
     <oasis:colspec colnum="10" colname="col10" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1">Validation strategies</oasis:entry>
         <oasis:entry rowsep="1" namest="col2" nameend="col4" align="center" colsep="1">All stations </oasis:entry>
         <oasis:entry rowsep="1" namest="col5" nameend="col7" align="center" colsep="1">Representative stations </oasis:entry>
         <oasis:entry rowsep="1" namest="col8" nameend="col10" align="center">Excluded stations </oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M113" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col3">RMSE</oasis:entry>
         <oasis:entry colname="col4">ubRMSE</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M114" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">RMSE</oasis:entry>
         <oasis:entry colname="col7">ubRMSE</oasis:entry>
         <oasis:entry colname="col8"><inline-formula><mml:math id="M115" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col9">RMSE</oasis:entry>
         <oasis:entry colname="col10">ubRMSE</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">(m<inline-formula><mml:math id="M116" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M117" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col4">(m<inline-formula><mml:math id="M118" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M119" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col5"/>
         <oasis:entry colname="col6">(m<inline-formula><mml:math id="M120" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M121" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col7">(m<inline-formula><mml:math id="M122" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M123" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col8"/>
         <oasis:entry colname="col9">(m<inline-formula><mml:math id="M124" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M125" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col10">(m<inline-formula><mml:math id="M126" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M127" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">Random</oasis:entry>
         <oasis:entry colname="col2">0.917</oasis:entry>
         <oasis:entry colname="col3">0.047</oasis:entry>
         <oasis:entry colname="col4">0.047</oasis:entry>
         <oasis:entry colname="col5">0.941</oasis:entry>
         <oasis:entry colname="col6">0.038</oasis:entry>
         <oasis:entry colname="col7">0.038</oasis:entry>
         <oasis:entry colname="col8">0.918</oasis:entry>
         <oasis:entry colname="col9">0.049</oasis:entry>
         <oasis:entry colname="col10">0.049</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Year independent</oasis:entry>
         <oasis:entry colname="col2">0.800</oasis:entry>
         <oasis:entry colname="col3">0.070</oasis:entry>
         <oasis:entry colname="col4">0.070</oasis:entry>
         <oasis:entry colname="col5">0.873</oasis:entry>
         <oasis:entry colname="col6">0.054</oasis:entry>
         <oasis:entry colname="col7">0.054</oasis:entry>
         <oasis:entry colname="col8">0.734</oasis:entry>
         <oasis:entry colname="col9">0.084</oasis:entry>
         <oasis:entry colname="col10">0.084</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Site independent</oasis:entry>
         <oasis:entry colname="col2">0.630</oasis:entry>
         <oasis:entry colname="col3">0.093</oasis:entry>
         <oasis:entry colname="col4">0.093</oasis:entry>
         <oasis:entry colname="col5">0.715</oasis:entry>
         <oasis:entry colname="col6">0.079</oasis:entry>
         <oasis:entry colname="col7">0.079</oasis:entry>
         <oasis:entry colname="col8">0.564</oasis:entry>
         <oasis:entry colname="col9">0.106</oasis:entry>
         <oasis:entry colname="col10">0.106</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><?xmltex \gdef\@currentlabel{4}?></table-wrap>

      <p id="d1e3393">Figure 4 shows the permutation feature importance results of the XGBoost
models trained using representative soil moisture stations, which were
calculated separately for the three different types of test samples. The
permutation importance of an input feature is commonly measured by the
degradation of model accuracy when the feature is randomly shuffled
(Breiman, 2001), it can be calculated multiple times across a test
dataset and is less likely to be biased towards high-cardinality features.
Notably, permutation importance does not reflect a feature's intrinsic
predictive value but rather its relative importance to a particular model.
For all three types of test samples, ERA5-Land surface soil moisture
(SM_era) achieved the highest importance score, indicating
that this coarse-scale reanalysis soil moisture product can indeed provide
reliable soil moisture background information for the 1 km soil moisture
estimation model. Specifically, for both the random and year-independent
test samples (Fig. 4a, b), the importance of elevation and soil texture
variables (sand, silt, and clay) ranked relatively high, showing that soil
properties and topographic factors are important for accurate model
predictions when the sample locations are known. In addition, the three
GLASS black-sky albedo bands (ABD_vis, ABD_nir, and ABD_short) also achieved relatively high importance
scores for both types of samples, likely because surface albedo can reflect
the surface energy flux and land cover conditions, which are further
correlated to the spatial variation in soil moisture
(Long et al.,
2019). Meanwhile, the importance scores of GLASS LAI and LST were relatively
low for the two sample types, which may be partly attributed to their
correlation with some high-ranking variables (e.g., ABD_vis,
SM_era). For example, after removing ERA5-Land soil moisture
from the models, the importance scores of both GLASS LST and LAI increased
significantly. In contrast, for the site-independent test samples (Fig. 4c), the importance of ERA5-Land surface soil moisture (SM_era) further increased relative to other variables. In addition, the
importance ranking of GLASS albedo and LST increased remarkably, whereas
that of terrain and soil texture-related variables dropped dramatically,
suggesting that when the location of the test samples is unknown to the
model, variables such as coarse-scale soil moisture, albedo, and LST appear
to be more important for accurately predicting soil moisture. Note that the
final model was developed using all the representative ISMN stations, and
its<?pagebreak page2065?> feature importance results over unknown regions could refer to those
calculated on the site-independent test samples.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F4" specific-use="star"><?xmltex \currentcnt{4}?><?xmltex \def\figurename{Figure}?><label>Figure 4</label><caption><p id="d1e3399">Permutation feature importance results of the XGBoost models
trained using the representative stations, and calculated using the <bold>(a)</bold> random, <bold>(b)</bold> year-independent, and <bold>(c)</bold> site-independent test samples.
Features from different input datasets are divided into four groups with
different colors.
</p></caption>
          <?xmltex \igopts{width=441.017717pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f04.png"/>

        </fig>

      <p id="d1e3417">To further investigate the importance of different types of input variables
for the 1 km soil moisture estimation model over unknown space, the
validation accuracy of the XGBoost models developed using different
combinations of input datasets on the site-independent test samples was
also compared. The XGBoost model trained with all input datasets achieved
the highest accuracy (Table 5), with <inline-formula><mml:math id="M128" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE values of 0.715 and 0.079 m<inline-formula><mml:math id="M129" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M130" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. After the ERA5-Land soil moisture product
was excluded, the model accuracy for the test dataset decreased
significantly, with the RMSE value increasing to 0.086 m<inline-formula><mml:math id="M131" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M132" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>,
further reflecting the relatively high importance of the coarse-scale soil
moisture background information for the 1 km estimation model derived here.
Similarly, after excluding GLASS albedo, LAI, and LST from the input
variables, the model trained with the remaining variables showed a marked
decrease in accuracy for the test dataset, with <inline-formula><mml:math id="M133" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE values of 0.694
and 0.083 m<inline-formula><mml:math id="M134" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M135" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. This indicates that the
information on soil and vegetation reflective properties, surface
temperature, and vegetation types and densities provided by GLASS
products are also important for the 1 km soil moisture estimation model.
Further, the exclusion of terrain or soil texture datasets showed a similar
effect on model accuracy, with RMSE values decreasing to 0.082 and 0.083 m<inline-formula><mml:math id="M136" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M137" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively, again suggesting the pertinent contribution
of these variables to improving the performance of the soil moisture
estimation model. Besides, as shown in Table 2, the spatial resolution of
most input datasets was within 1 km, except for the ERA5-Land product which
had a relatively low spatial resolution (0.1<inline-formula><mml:math id="M138" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>). Therefore, the
integration of multisource input datasets using a machine learning model
can improve not only the model accuracy but also the spatial details of the soil
moisture product. Because the XGBoost model trained with all input
datasets performed best on the test dataset, all datasets were included in
model training during the subsequent experiments.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T5" specific-use="star"><?xmltex \currentcnt{5}?><label>Table 5</label><caption><p id="d1e3531">Performance metrics of the XGBoost model developed using different
combinations of input datasets on the site-independent test samples.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="4">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1">Input datasets</oasis:entry>
         <oasis:entry colname="col2"><inline-formula><mml:math id="M139" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col3">RMSE</oasis:entry>
         <oasis:entry colname="col4">ubRMSE</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">(m<inline-formula><mml:math id="M140" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M141" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
         <oasis:entry colname="col4">(m<inline-formula><mml:math id="M142" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M143" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>)</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">All datasets included</oasis:entry>
         <oasis:entry colname="col2">0.715</oasis:entry>
         <oasis:entry colname="col3">0.079</oasis:entry>
         <oasis:entry colname="col4">0.079</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Coarse SM (ERA5-Land) excluded</oasis:entry>
         <oasis:entry colname="col2">0.646</oasis:entry>
         <oasis:entry colname="col3">0.086</oasis:entry>
         <oasis:entry colname="col4">0.086</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Albedo, LAI, and LST (GLASS) excluded</oasis:entry>
         <oasis:entry colname="col2">0.694</oasis:entry>
         <oasis:entry colname="col3">0.083</oasis:entry>
         <oasis:entry colname="col4">0.082</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Terrain (MERIT) excluded</oasis:entry>
         <oasis:entry colname="col2">0.700</oasis:entry>
         <oasis:entry colname="col3">0.082</oasis:entry>
         <oasis:entry colname="col4">0.082</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Soil texture (SoilGrids) excluded</oasis:entry>
         <oasis:entry colname="col2">0.684</oasis:entry>
         <oasis:entry colname="col3">0.083</oasis:entry>
         <oasis:entry colname="col4">0.083</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><?xmltex \gdef\@currentlabel{5}?></table-wrap>

      <p id="d1e3704">To explore the causes of decreased 1 km soil moisture estimation model
accuracies over unknown time and space, performance metrics of the models
were calculated for each station, which were trained using all ISMN or
representative soil moisture stations selected by the TC method. To obtain
the validation accuracy for each station, a 5-fold cross-validation method
was adopted, where the stations were randomly divided into five folds, with
samples from four folds used to develop the model, and the accuracy metrics
were derived for the remaining fold. This process was repeated five times,
until the accuracies of all stations were evaluated. The distribution of
performance metrics for the XGBoost model developed using all stations was
dispersed across stations, with <inline-formula><mml:math id="M144" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values ranging from <inline-formula><mml:math id="M145" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>1 to 1, and RMSE
values ranging from 0.005 to 0.397 m<inline-formula><mml:math id="M146" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M147" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> (Fig. 5, Table 6).
Although the median of the bias between model predicted and measured soil
moisture was 0, the model exhibited a large prediction bias for most
stations (from <inline-formula><mml:math id="M148" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.39 to 0.34 m<inline-formula><mml:math id="M149" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M150" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>), partly contributing to the
large RMSE observed at these stations. After removing the prediction bias
for each station, the median ubRMSE of the model decreased to 0.055 m<inline-formula><mml:math id="M151" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M152" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, compared to the median RMSE of 0.075 m<inline-formula><mml:math id="M153" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M154" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. As a
comparison, the performance metrics of the ERA5-Land soil moisture product
at each ISMN station were also calculated and are displayed in Fig. 5. The
coarse-scale soil moisture product showed similar <inline-formula><mml:math id="M155" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values to those of the
XGBoost model developed using all stations, and it also yielded large bias
and dispersed RMSE and ubRMSE values at most stations.</p>
      <p id="d1e3821">After filtering the stations using the TC method, the accuracies of the
ERA5-Land soil moisture product at those representative stations improved
significantly. Similarly, the validation accuracies of the model developed
using the representative stations also improved significantly, with the
distribution of its performance metrics being more concentrated across
stations, compared to the model developed without station filtering. In
particular, the median <inline-formula><mml:math id="M156" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> of the model at each station increased from 0.64 to
0.74, median RMSE decreased from 0.075 to 0.068 m<inline-formula><mml:math id="M157" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M158" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, and ubRMSE
decreased from 0.055 to 0.052 m<inline-formula><mml:math id="M159" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M160" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. Over most of the
representative stations, the XGBoost model obtained similar or even larger <inline-formula><mml:math id="M161" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula>
values compared to the ERA5-Land soil moisture product. However, there were
also several stations where the model achieved relatively lower <inline-formula><mml:math id="M162" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values,
yet this degradation in temporal metrics with respect to the original
coarse-scale products can be found in many soil moisture downscaling studies
(Gruber et al., 2020).</p>
      <?pagebreak page2066?><p id="d1e3889">On the other hand, the model developed using the representative stations
still exhibited a large bias at most stations, ranging from <inline-formula><mml:math id="M163" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.21 to 0.21 m<inline-formula><mml:math id="M164" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M165" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, although the median bias of the model was 0. Therefore,
the decreased overall accuracies of the model over unknown spaces can be
attributed to these large site-specific biases, which may be caused by the
high spatiotemporal variability of surface soil moisture and the scale
differences between the target point-scale soil moisture and 1 km model
predicted soil moisture. Specifically, in random and year-independent
validation strategies, part of the site-specific information is known to the
models; whereas in the site-independent validation method, this information
is entirely unknown to the model. By adopting the TC method, it is possible
to select soil moisture stations that are representative of the average soil
moisture on a larger scale, thereby alleviating the scale difference issue
to some extent. However, there may still be large biases between
measurements from these point-scale representative soil moisture stations
and footprint-scale average soil moisture values. As these biases are
site-specific, can be positive or negative, and have a median value for all
samples near 0, the overall ubRMSE that the model achieved on the site- or
year-independent test samples can still be large when these biases are
unknown to the model. Nevertheless, training the model with representative
soil moisture stations not only improved the model's overall performance
over unknown spatiotemporal locations (Table 4) but also improved the
performance metrics of the model at each station (Fig. 5).</p>

      <?xmltex \floatpos{p}?><fig id="Ch1.F5"><?xmltex \currentcnt{5}?><?xmltex \def\figurename{Figure}?><label>Figure 5</label><caption><p id="d1e3922">Boxplots of the <bold>(a)</bold> <inline-formula><mml:math id="M166" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula>, <bold>(b)</bold> bias, <bold>(c)</bold> RMSE, and <bold>(d)</bold> ubRMSE achieved
by the XGBoost models (blue) developed using all stations and the
representative stations selected by the TC method, respectively, in
comparison with those of the ERA5-Land soil moisture product (orange).</p></caption>
          <?xmltex \igopts{width=241.848425pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f05.png"/>

        </fig>

      <?pagebreak page2067?><p id="d1e3950">In addition to the performance metrics of the two XGBoost models at each
station, Table 6 shows the validation accuracies of the model developed
using the representative stations over different land cover types. Affected
by a series of practical factors, the distribution of ISMN soil moisture
stations is uneven in space, with the majority of the stations located in
the CONUS. After screening stations via the TC method, the spatial
distribution of representative stations remained uneven, with the resulting
number of stations for each land cover type also varying significantly (Fig. 1). Overall, the performance of the model developed using the representative
stations for most land cover types showed an improvement compared with the
model developed using all stations, as indicated by larger median <inline-formula><mml:math id="M167" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values
and smaller median RMSE and ubRMSE values. However, the median ubRMSE of the
model achieved for forests was larger than that for other land cover types,
likely a result of soil moisture maintaining at high levels in forested
areas. Additionally, among the seven land cover types, the model achieved
the lowest median <inline-formula><mml:math id="M168" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values for shrublands and barren lands, likely due to
the limited number of stations present across these two types. However, the
model also achieved the lowest median ubRMSE values for these two types,
which can be partly attributed to the fact that despite the low sample
percentages the number of samples for these land cover types was sufficient
for the models to learn, as well as in part due to the relatively simple soil
moisture dynamics of these two types. Although the median bias of the model
for each land cover type was near 0, the model exhibited a large prediction
bias for most stations across each land cover type (Table 6). After removing
the prediction bias at each station, the median ubRMSE of the model for the
seven land cover types ranged from 0.031 to 0.061 m<inline-formula><mml:math id="M169" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M170" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, marking
a dramatic decrease over the corresponding median RMSE. Given that a large
prediction bias existed in each land cover type and that the model
performance did not vary significantly across different types, it was
suggested that the uneven distribution of land cover types across samples
was not the major cause of the decreased overall model accuracy over unknown
spaces.</p>

<?xmltex \floatpos{t}?><table-wrap id="Ch1.T6" specific-use="star"><?xmltex \currentcnt{6}?><label>Table 6</label><caption><p id="d1e3991">Performance metric statistics for the XGBoost models developed using
all stations and representative stations, as well as those achieved by the latter
model over each land cover type.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="14">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right" colsep="1"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:colspec colnum="5" colname="col5" align="right" colsep="1"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:colspec colnum="7" colname="col7" align="right"/>
     <oasis:colspec colnum="8" colname="col8" align="right" colsep="1"/>
     <oasis:colspec colnum="9" colname="col9" align="right"/>
     <oasis:colspec colnum="10" colname="col10" align="right"/>
     <oasis:colspec colnum="11" colname="col11" align="right" colsep="1"/>
     <oasis:colspec colnum="12" colname="col12" align="right"/>
     <oasis:colspec colnum="13" colname="col13" align="right"/>
     <oasis:colspec colnum="14" colname="col14" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1">Types</oasis:entry>
         <oasis:entry colname="col2">No.</oasis:entry>
         <oasis:entry rowsep="1" namest="col3" nameend="col5" align="center" colsep="1"><inline-formula><mml:math id="M171" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" namest="col6" nameend="col8" align="center" colsep="1">Bias (m<inline-formula><mml:math id="M172" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M173" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>) </oasis:entry>
         <oasis:entry rowsep="1" namest="col9" nameend="col11" align="center" colsep="1">RMSE (m<inline-formula><mml:math id="M174" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M175" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>) </oasis:entry>
         <oasis:entry rowsep="1" namest="col12" nameend="col14" align="center">ubRMSE (m<inline-formula><mml:math id="M176" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M177" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>) </oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">med</oasis:entry>
         <oasis:entry colname="col4">min</oasis:entry>
         <oasis:entry colname="col5">max</oasis:entry>
         <oasis:entry colname="col6">med</oasis:entry>
         <oasis:entry colname="col7">min</oasis:entry>
         <oasis:entry colname="col8">max</oasis:entry>
         <oasis:entry colname="col9">med</oasis:entry>
         <oasis:entry colname="col10">min</oasis:entry>
         <oasis:entry colname="col11">max</oasis:entry>
         <oasis:entry colname="col12">med</oasis:entry>
         <oasis:entry colname="col13">min</oasis:entry>
         <oasis:entry colname="col14">max</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">All stations</oasis:entry>
         <oasis:entry colname="col2">1145</oasis:entry>
         <oasis:entry colname="col3">0.64</oasis:entry>
         <oasis:entry colname="col4"><inline-formula><mml:math id="M178" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>1.0</oasis:entry>
         <oasis:entry colname="col5">1.0</oasis:entry>
         <oasis:entry colname="col6">0.00</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M179" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.39</oasis:entry>
         <oasis:entry colname="col8">0.34</oasis:entry>
         <oasis:entry colname="col9">0.075</oasis:entry>
         <oasis:entry colname="col10">0.005</oasis:entry>
         <oasis:entry colname="col11">0.397</oasis:entry>
         <oasis:entry colname="col12">0.055</oasis:entry>
         <oasis:entry colname="col13">0.000</oasis:entry>
         <oasis:entry colname="col14">0.188</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Selected stations</oasis:entry>
         <oasis:entry colname="col2">715</oasis:entry>
         <oasis:entry colname="col3">0.74</oasis:entry>
         <oasis:entry colname="col4">0.11</oasis:entry>
         <oasis:entry colname="col5">0.99</oasis:entry>
         <oasis:entry colname="col6">0.00</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M180" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.21</oasis:entry>
         <oasis:entry colname="col8">0.21</oasis:entry>
         <oasis:entry colname="col9">0.068</oasis:entry>
         <oasis:entry colname="col10">0.019</oasis:entry>
         <oasis:entry colname="col11">0.220</oasis:entry>
         <oasis:entry colname="col12">0.052</oasis:entry>
         <oasis:entry colname="col13">0.017</oasis:entry>
         <oasis:entry colname="col14">0.132</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Forests</oasis:entry>
         <oasis:entry colname="col2">35</oasis:entry>
         <oasis:entry colname="col3">0.73</oasis:entry>
         <oasis:entry colname="col4">0.11</oasis:entry>
         <oasis:entry colname="col5">0.85</oasis:entry>
         <oasis:entry colname="col6">0.02</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M181" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.14</oasis:entry>
         <oasis:entry colname="col8">0.18</oasis:entry>
         <oasis:entry colname="col9">0.079</oasis:entry>
         <oasis:entry colname="col10">0.041</oasis:entry>
         <oasis:entry colname="col11">0.185</oasis:entry>
         <oasis:entry colname="col12">0.061</oasis:entry>
         <oasis:entry colname="col13">0.026</oasis:entry>
         <oasis:entry colname="col14">0.091</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Shrublands</oasis:entry>
         <oasis:entry colname="col2">16</oasis:entry>
         <oasis:entry colname="col3">0.61</oasis:entry>
         <oasis:entry colname="col4">0.46</oasis:entry>
         <oasis:entry colname="col5">0.79</oasis:entry>
         <oasis:entry colname="col6"><inline-formula><mml:math id="M182" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.01</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M183" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.07</oasis:entry>
         <oasis:entry colname="col8">0.10</oasis:entry>
         <oasis:entry colname="col9">0.043</oasis:entry>
         <oasis:entry colname="col10">0.027</oasis:entry>
         <oasis:entry colname="col11">0.116</oasis:entry>
         <oasis:entry colname="col12">0.031</oasis:entry>
         <oasis:entry colname="col13">0.022</oasis:entry>
         <oasis:entry colname="col14">0.056</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Savannas</oasis:entry>
         <oasis:entry colname="col2">185</oasis:entry>
         <oasis:entry colname="col3">0.77</oasis:entry>
         <oasis:entry colname="col4">0.24</oasis:entry>
         <oasis:entry colname="col5">0.97</oasis:entry>
         <oasis:entry colname="col6">0.01</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M184" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.17</oasis:entry>
         <oasis:entry colname="col8">0.18</oasis:entry>
         <oasis:entry colname="col9">0.070</oasis:entry>
         <oasis:entry colname="col10">0.019</oasis:entry>
         <oasis:entry colname="col11">0.194</oasis:entry>
         <oasis:entry colname="col12">0.051</oasis:entry>
         <oasis:entry colname="col13">0.017</oasis:entry>
         <oasis:entry colname="col14">0.132</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Grassland</oasis:entry>
         <oasis:entry colname="col2">327</oasis:entry>
         <oasis:entry colname="col3">0.75</oasis:entry>
         <oasis:entry colname="col4">0.26</oasis:entry>
         <oasis:entry colname="col5">0.99</oasis:entry>
         <oasis:entry colname="col6">0.00</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M185" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.21</oasis:entry>
         <oasis:entry colname="col8">0.21</oasis:entry>
         <oasis:entry colname="col9">0.067</oasis:entry>
         <oasis:entry colname="col10">0.019</oasis:entry>
         <oasis:entry colname="col11">0.220</oasis:entry>
         <oasis:entry colname="col12">0.053</oasis:entry>
         <oasis:entry colname="col13">0.018</oasis:entry>
         <oasis:entry colname="col14">0.083</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Urban</oasis:entry>
         <oasis:entry colname="col2">12</oasis:entry>
         <oasis:entry colname="col3">0.68</oasis:entry>
         <oasis:entry colname="col4">0.34</oasis:entry>
         <oasis:entry colname="col5">0.87</oasis:entry>
         <oasis:entry colname="col6">0.00</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M186" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.15</oasis:entry>
         <oasis:entry colname="col8">0.13</oasis:entry>
         <oasis:entry colname="col9">0.068</oasis:entry>
         <oasis:entry colname="col10">0.027</oasis:entry>
         <oasis:entry colname="col11">0.152</oasis:entry>
         <oasis:entry colname="col12">0.050</oasis:entry>
         <oasis:entry colname="col13">0.025</oasis:entry>
         <oasis:entry colname="col14">0.067</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Croplands</oasis:entry>
         <oasis:entry colname="col2">130</oasis:entry>
         <oasis:entry colname="col3">0.73</oasis:entry>
         <oasis:entry colname="col4">0.29</oasis:entry>
         <oasis:entry colname="col5">0.89</oasis:entry>
         <oasis:entry colname="col6">0.00</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M187" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.20</oasis:entry>
         <oasis:entry colname="col8">0.21</oasis:entry>
         <oasis:entry colname="col9">0.065</oasis:entry>
         <oasis:entry colname="col10">0.030</oasis:entry>
         <oasis:entry colname="col11">0.214</oasis:entry>
         <oasis:entry colname="col12">0.049</oasis:entry>
         <oasis:entry colname="col13">0.026</oasis:entry>
         <oasis:entry colname="col14">0.106</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">Barren</oasis:entry>
         <oasis:entry colname="col2">10</oasis:entry>
         <oasis:entry colname="col3">0.57</oasis:entry>
         <oasis:entry colname="col4">0.27</oasis:entry>
         <oasis:entry colname="col5">0.82</oasis:entry>
         <oasis:entry colname="col6"><inline-formula><mml:math id="M188" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.03</oasis:entry>
         <oasis:entry colname="col7"><inline-formula><mml:math id="M189" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>0.07</oasis:entry>
         <oasis:entry colname="col8">0.08</oasis:entry>
         <oasis:entry colname="col9">0.050</oasis:entry>
         <oasis:entry colname="col10">0.028</oasis:entry>
         <oasis:entry colname="col11">0.090</oasis:entry>
         <oasis:entry colname="col12">0.034</oasis:entry>
         <oasis:entry colname="col13">0.025</oasis:entry>
         <oasis:entry colname="col14">0.056</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><?xmltex \gdef\@currentlabel{6}?></table-wrap>

</sec>
<sec id="Ch1.S4.SS3">
  <label>4.3</label><title>Validation of the GLASS SM product on independent networks</title>
      <p id="d1e4674">Using the XGBoost model developed above, a global 1 km spatiotemporally
continuous soil moisture product (GLASS SM) was generated. To intuitively
demonstrate the ability of this product for capturing the temporal
variations in soil moisture over an unknown space, four independent networks
under different climatic and environmental conditions were selected, and the
time-series curves of the GLASS and measured soil moisture for these
networks were compared. Considering the high spatiotemporal variability of
surface soil moisture and the scale differences between point-scale
observations and the 1 km GLASS SM product, the mean measured soil moisture
curve was first calculated by averaging soil moisture curves from all
stations within a network and then compared with the mean predicted soil
moisture curve calculated using all corresponding pixels of the GLASS SM
product within that network. Moreover, as an input variable of the 1 km soil
moisture estimation model, the time-series curves of the ERA5-Land
reanalysis soil moisture product over the four independent networks were
also extracted as a reference.</p>
      <p id="d1e4677">In most cases, the GLASS soil moisture curves were much closer to the
measured values than the time-series curves of the ERA5-Land reanalysis soil
moisture product in both the YA and YB soil moisture networks (Fig. 6a,
b). The <inline-formula><mml:math id="M190" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values between the GLASS and measured soil moisture for these
two networks were 0.84 and 0.89, respectively, which were slightly higher
than the ERA5-Land soil moisture (0.80 and 0.84); whereas the ubRMSE values
were 0.048 and 0.034 m<inline-formula><mml:math id="M191" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M192" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively, slightly lower than the
ERA5-Land soil moisture product (0.052 and 0.044 m<inline-formula><mml:math id="M193" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M194" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>).
Accordingly, over these two relatively dense soil moisture networks, the
1 km GLASS SM product can basically capture the dynamics of measured soil
moisture. However, underestimates occurred at some high-value intervals on
the measured soil moisture curves, which may be caused by nearby irrigation
at some stations within agricultural regions, where the GLASS SM product may
not be able to capture such patterns, given that irrigation is usually not
uniformly distributed in space.</p>
      <p id="d1e4729">For the Fort Cobb and Little Washita soil moisture networks, both the GLASS
and ERA5-Land soil moisture estimates basically captured the dynamics of
measured soil moisture (Fig. 6c, d). Specifically, the <inline-formula><mml:math id="M195" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values between
the mean GLASS and measured soil moisture for these two networks were 0.69
and 0.76, respectively, slightly lower than the ERA5-Land soil moisture
product (0.74 and 0.77). However, both the GLASS and ERA5-Land reanalysis
soil moisture products showed a large positive bias throughout most of the
observation period, particularly in the Little Washita network. This is
likely because these two soil moisture networks cover a relatively large
watershed containing only a few stations. Nevertheless, the ubRMSE values
between the mean GLASS and measured soil moisture values for these two
networks were 0.037 and 0.033 m<inline-formula><mml:math id="M196" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M197" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively, which were
significantly lower than those for the ERA5-Land soil moisture (0.047 and
0.046 m<inline-formula><mml:math id="M198" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M199" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>). Overall, above results suggested that the derived
product can accurately capture the temporal variations of in situ soil
moisture under different climatic conditions. Further, the GLASS SM product
achieved similar <inline-formula><mml:math id="M200" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values as the ERA5-Land product across these networks,
with the <inline-formula><mml:math id="M201" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values ranging from 0.69 to 0.89 and the ubRMSE values ranging from
0.033 to 0.048 m<inline-formula><mml:math id="M202" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M203" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F6" specific-use="star"><?xmltex \currentcnt{6}?><?xmltex \def\figurename{Figure}?><label>Figure 6</label><caption><p id="d1e4820">Time-series plots of the mean in situ, ERA5-Land, and GLASS soil
moisture for four independent soil moisture networks.</p></caption>
          <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f06.jpg"/>

        </fig>

</sec>
<?pagebreak page2069?><sec id="Ch1.S4.SS4">
  <label>4.4</label><title>Comparison with existing global soil moisture products</title>
      <p id="d1e4838">After producing the global 1 km spatiotemporally continuous GLASS SM
product, it was compared with two global microwave soil moisture products
for spatiotemporal consistency. The first product selected for comparison
was SPL2SMAP_S, the first publicly released global soil
moisture product at a spatial resolution of 1 km. Because the
SPL2SMAP_S 1 km product has a temporal resolution of 12 d
over most global areas and as it has many spatial gaps at the daily scale,
spatial synthesis of the SPL2SMAP_S dataset was conducted during a
12 d period with relatively high spatial coverage before comparison.
Figure 7 shows the spatial distribution of the SPL2SMAP_S
1 km soil moisture product, synthesized from 3 to 15 October 2016, alongside
the 1 km spatiotemporally continuous GLASS SM map for 9 October 2016. Here,
it can be seen that the 12 d synthetic SPL2SMAP_S soil
moisture product still has large spatial gaps (e.g., the western continental
United States, western China, and southwestern Australia), whereas the GLASS
SM product has a substantially more complete spatial coverage (except for
the high-latitude regions during the cold seasons). With regards to the
spatial distribution characteristics, both soil moisture products with 1 km
resolutions exhibits a high level of consistency, with higher soil moisture
levels found in the tropics, eastern USA, and southeastern China and with lower
levels observed in deserts (e.g., Sahara) and other semiarid regions.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F7"><?xmltex \currentcnt{7}?><?xmltex \def\figurename{Figure}?><label>Figure 7</label><caption><p id="d1e4843"><bold>(a)</bold> The 12 d synthetic SPL2SMAP_S 1 km soil moisture
map from 3 to 15 October 2016, and <bold>(b)</bold> the 1 km spatiotemporally continuous
GLASS SM map on 9 October 2016.</p></caption>
          <?xmltex \igopts{width=241.848425pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f07.jpg"/>

        </fig>

      <p id="d1e4857">To quantitatively investigate the spatial consistency between these two 1 km
soil moisture products, spatial <inline-formula><mml:math id="M204" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSD between them were calculated for
each 12 d of 2016 using collocated pixels, after removing soil moisture
estimates larger than 0.6 m<inline-formula><mml:math id="M205" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M206" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> from the SPL2SMAP_S product. As displayed in Fig. 8a, the spatial <inline-formula><mml:math id="M207" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> (orange line) between
the GLASS and SPL2SMAP_S products ranges from 0.61 to 0.67,
with a median value of 0.62, partially affected by the discontinuous spatial
coverage of the SPL2SMAP_S product. The spatial RMSD (orange
dots) between the two 1 km products in 2016 ranges from 0.098 to 0.106 m<inline-formula><mml:math id="M208" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M209" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, and the relatively large RMSD values may be attributed to
the greater spatial heterogeneity (e.g., terrain and soil texture) at fine
scales which could cause large disparities in soil moisture estimates from
different algorithms. Overall, both qualitative and quantitative comparisons
suggested a good and stable spatial consistency between the 1 km GLASS and
SPL2SMAP_S microwave soil moisture products.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F8" specific-use="star"><?xmltex \currentcnt{8}?><?xmltex \def\figurename{Figure}?><label>Figure 8</label><caption><p id="d1e4920">Time-series plots of the spatial <inline-formula><mml:math id="M210" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> (lines) and RMSD (dots)
calculated between <bold>(a)</bold> the GLASS and SPL2SMAP_S soil moisture
products at 1 km resolution and <bold>(b)</bold> the ESA CCI and three resampled soil
moisture products (ERA5-Land, GLASS, and SPL2SMAP_S) at
0.25<inline-formula><mml:math id="M211" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> resolution in 2016.</p></caption>
          <?xmltex \igopts{width=355.659449pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f08.png"/>

        </fig>

      <p id="d1e4951">The second global product selected for comparison was the widely used ESA
CCI combined soil moisture dataset with a spatial resolution of
0.25<inline-formula><mml:math id="M212" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>. Because the CCI soil moisture product has a daily temporal
resolution and more complete spatial coverage, more quantitative analyses
can be conducted when comparing with the 1 km spatiotemporally continuous
GLASS SM product. Figure 9 shows the spatial distribution of the CCI
active–passive microwave combined soil moisture and GLASS SM resampled to
0.25<inline-formula><mml:math id="M213" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> for 4 d from different seasons in 2016, as well as the
corresponding scatterplots of these two soil moisture products. The high
spatial consistency between the CCI soil moisture product and resampled
GLASS SM product on different dates is readily apparent, as both products
display lower soil moisture values in arid regions, including the western
USA, northern and southern Africa, the Middle East, central and western Asia,
and Austria, and higher soil moisture values in tropical and temperate
regions, such as central Africa, southern Asia, the eastern USA, and
southeastern China. Although CCI estimates incorporate a variety of active
and passive microwave soil moisture products, its spatial coverage remains
incomplete, partly due to observation gaps of the sensors and the physical
limitations of microwave soil moisture retrieval algorithms
(Dorigo et al., 2017),
such as failing to provide accurate soil moisture predictions on densely
vegetated land surfaces (e.g., the Amazon River and Congo basins). In
contrast, the GLASS SM product shows greater spatial integrity, except at
high latitudes in cold seasons due to low temperatures and frozen soils.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F9" specific-use="star"><?xmltex \currentcnt{9}?><?xmltex \def\figurename{Figure}?><label>Figure 9</label><caption><p id="d1e4974"><bold>(a–d)</bold> ESA CCI combined soil moisture maps at 0.25<inline-formula><mml:math id="M214" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>,
<bold>(e–h)</bold> the corresponding spatiotemporally continuous GLASS SM maps resampled
to 0.25<inline-formula><mml:math id="M215" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>, and <bold>(i–l)</bold> scatterplots of the two products for four
Julian dates (90, 180, 270, 360) selected from different seasons of 2016.</p></caption>
          <?xmltex \igopts{width=384.112205pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f09.jpg"/>

        </fig>

      <p id="d1e5009">As shown in Fig. 8b, the daily spatial <inline-formula><mml:math id="M216" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> between the resampled GLASS and
ESA CCI soil moisture products at 0.25<inline-formula><mml:math id="M217" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> resolution in 2016 ranges
from 0.72 to 0.86, with a median value of 0.82, indicating that the two
products exhibit high spatial consistency across the seasons. As a
comparison, the spatial <inline-formula><mml:math id="M218" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSD between the CCI and two other resampled
soil moisture products (ERA5-Land and SPL2SMAP_S) were also
calculated and plotted. It is clear that the spatial <inline-formula><mml:math id="M219" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> curves of the
resampled ERA5-Land (blue) and GLASS (orange) at 0.25<inline-formula><mml:math id="M220" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> only differ
slightly, which is to be expected given that the coarse-scale ERA5-Land<?pagebreak page2070?> soil
moisture was used to provide background soil moisture information for our
model and given that it achieved the highest importance score among all the input
variables. Both curves exhibit significant seasonal variation, with higher
spatial <inline-formula><mml:math id="M221" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values in spring and winter than in summer or autumn, possibly
related to the larger differences between the two resampled products (GLASS
and ERA5-Land) and CCI over high latitudes. However, the spatial RMSD curves
of the ERA5-Land and GLASS differ significantly. While the blue dotted line
(RMSD between CCI and ERA5-Land) exhibits an opposite seasonal pattern to
the <inline-formula><mml:math id="M222" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> curves, with RMSD ranging widely from 0.086 to 0.12 m<inline-formula><mml:math id="M223" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M224" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>,
the orange dotted line (RMSD between CCI and GLASS) is more stable, with
RMSD ranging from 0.068 to 0.087 m<inline-formula><mml:math id="M225" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M226" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. Besides, as also shown in
Fig. 8b, although the resampled SPL2SMAP_S soil moisture
product has the most stable spatial <inline-formula><mml:math id="M227" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSD curves (gray), it achieves
relatively lower spatial <inline-formula><mml:math id="M228" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values and larger spatial RMSD values than those
of the resampled GLASS product at 0.25<inline-formula><mml:math id="M229" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>, suggesting its relatively
lower level of spatial consistency with the CCI product. This is to our
surprise considering that both the SPL2SMAP_S and CCI soil
moisture products were derived from microwave satellite observations, and a
possible cause for this could be the discontinuous spatial coverage of the
SPL2SMAP_S product.</p>
      <?pagebreak page2071?><p id="d1e5132">Note that the GLASS SM product displays a general underestimation relative
to the CCI combined soil moisture (Fig. 9i–l). Although the
overestimation of the CCI soil moisture product has been reported in
a previous study, particularly for equatorial (savanna) regions
(Al-Yaari et al.,
2019), the GLASS SM product may also contain some biases, which jointly
contribute to the RMSD between them. Figure 10 shows a zoomed-in comparison
between the 1 km GLASS and 0.25<inline-formula><mml:math id="M230" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> ESA CCI microwave soil moisture
product in western China on 28 June 2016, with the corresponding
0.1<inline-formula><mml:math id="M231" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> ERA5-Land reanalysis soil moisture product, which is one of
the main inputs to the XGBoost model, also shown as a reference. In general,
the GLASS product exhibits spatial consistency with both coarse-scale soil
moisture products, with lower soil moisture levels in the Junggar Basin,
Tarim Basin, Qaidam Basin, and western part of the Tibetan Plateau, and
higher soil moisture levels in the Tianshan Mountains, Ili River valley, and
southeastern part of the plateau where the vegetation is also much denser.
Specifically, in the southeastern Tibetan Plateau, the GLASS and CCI soil
moisture products show higher consistency, while the ERA5-Land soil moisture
product is suspected to be underestimated. Moreover, it is clear that the
1 km GLASS SM product is not only spatially complete but also contains more
spatial details which can well reflect the distribution patterns of terrain
and vegetation.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F10" specific-use="star"><?xmltex \currentcnt{10}?><?xmltex \def\figurename{Figure}?><label>Figure 10</label><caption><p id="d1e5156">Zoomed-in comparison of the <bold>(a)</bold> 1 km GLASS, <bold>(b)</bold> 0.25<inline-formula><mml:math id="M232" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>
ESA CCI, and <bold>(c)</bold> 0.1<inline-formula><mml:math id="M233" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> ERA5-Land soil moisture products in western
China on 28 June 2016 (the 180th day).</p></caption>
          <?xmltex \igopts{width=355.659449pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f10.jpg"/>

        </fig>

      <p id="d1e5192">In addition to the spatial consistency analysis described above, the
temporal consistency between the CCI and the spatiotemporally continuous GLASS
SM product was also explored. Specifically, for each pixel of these two
products with <inline-formula><mml:math id="M234" display="inline"><mml:mo>&gt;</mml:mo></mml:math></inline-formula> 30 d of concurrent predictions, the <inline-formula><mml:math id="M235" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSD
between the time-series soil moisture predictions were calculated separately
for 2016, and the spatial distribution of these two metrics is shown in Fig. 11. The correlation between the two products was high in most areas, except the
Sahara, high latitudes, and some localized regions. The relatively
low or even negative <inline-formula><mml:math id="M236" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values between the two products in the Sahara
is likely due to the fact that soil moisture in this region is close to zero, and a
small difference in temporal variation may lead to poor correlation. It can
also be seen from Fig. 11b that the RMSD values between the two products
in the Sahara were rather small. The relatively low <inline-formula><mml:math id="M237" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values between
the two products at high latitudes may be attributed to the irregular
prediction frequency of the CCI product at high latitudes and the rapid
change in soil moisture during the freeze–thaw transition period in this
region, which possibly cause larger errors in both products and thus
increased temporal inconsistency. Greater differences between soil moisture
products at high latitudes have also been found elsewhere
(Wang et al., 2021). Further, no obvious
patterns were revealed regarding the distribution of RMSD between the two
soil moisture products, as the regions with relatively large RMSD values
were rather scattered.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F11"><?xmltex \currentcnt{11}?><?xmltex \def\figurename{Figure}?><label>Figure 11</label><caption><p id="d1e5225">The spatial distribution of <bold>(a)</bold> <inline-formula><mml:math id="M238" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and <bold>(b)</bold> RMSD between the ESA CCI
combined soil moisture product and the spatiotemporally continuous GLASS SM
product in 2016.</p></caption>
          <?xmltex \igopts{width=241.848425pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f11.jpg"/>

        </fig>

</sec>
</sec>
<sec id="Ch1.S5">
  <label>5</label><title>Discussion</title>
      <p id="d1e5257">To address the lack of high-resolution, spatiotemporally continuous global
soil moisture products, this study developed a global 1 km soil moisture
estimation framework which integrated multisource datasets using an XGBoost
model. This framework was adapted from the 30 m soil moisture estimation
framework proposed by Zhang et al. (2022b), in which the Landsat 8
surface reflectance and thermal observations were replaced with the
spatiotemporally continuous GLASS albedo, LST, and LAI products to mitigate
the influence of clouds on the spatial continuity and temporal resolution of
soil moisture product. Meanwhile, the relatively high temporal resolution of
GLASS products allows for much more collocated training samples, which are
supposed to alleviate the underestimation of the original 30 m model at high
soil moisture levels. In addition, considering the relatively large-scale
differences between point-scale in situ soil moisture datasets and GLASS
products compared to Landsat datasets, the TC method was adopted to select
the representative soil moisture stations, and their measurements were used
as the training target of the model. Results showed that the 1 km soil
moisture estimation model achieved satisfactory overall accuracy, and
training the model with representative stations selected by the TC method
can considerably improve its performance over unknown time and space.</p>
      <p id="d1e5260">Most previous machine-learning-based studies aimed at soil moisture
estimation have divided the samples from all observation locations and times
randomly into training and test datasets. In this case, the model's accuracy on
the random test samples may seem rather high as a result of model
overfitting, because these test samples may not be spatially or temporally
independent of those in the training dataset, and part of the site-specific
information is disclosed to the model. Therefore, model performance must
also be fully evaluated using samples from unknown time or space. Senyurek
et al. (2020) trained a
random forest model using the Cyclone Global Navigation Satellite System
observations, as well as the ISMN in situ soil moisture and other
geophysical datasets, which was then fully evaluated using 5-fold
cross-validation, site-independent, and year-based techniques. Before the
model training process, several critical screening conditions were applied
to select 106 stations from the 234 ISMN soil moisture stations over the
CONUS, and the 5-fold cross-validation <inline-formula><mml:math id="M239" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE of the random forest model
were 0.89 and 0.052 m<inline-formula><mml:math id="M240" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M241" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively; whereas the
site-independent cross-validation <inline-formula><mml:math id="M242" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE values were 0.64 and 0.088 m<inline-formula><mml:math id="M243" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M244" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. Similarly, the overall <inline-formula><mml:math id="M245" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE of the
1 km GLASS SM model for the random and site-independent test samples were
0.941 and  0.038 m<inline-formula><mml:math id="M246" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M247" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula> and 0.715 and 0.079 m<inline-formula><mml:math id="M248" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M249" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>,
respectively. Notably, Senyurek et al. (2020) attributed the
relatively lower site-independent validation accuracy to the fact that
different soil moisture stations have distinct climatology, which is
difficult for the machine learning model to capture without bias.<?pagebreak page2072?> Instead,
we argue that the high validation accuracy achieved by the machine learning
models on the random test samples is most likely a result of overfitting,
while the relatively lower site-independent validation accuracy is much more
realistic. The authors further suggested that model performance could be
improved by increasing the representativeness of various land surface
conditions within training datasets. Although a representative training
dataset is essential for data-driven machine learning models, it was found
here that a large prediction bias existed across all land cover types, and
the resulting model performance did not vary significantly among them.
Therefore, it was concluded here that the site-specific biases induced by
scale differences rather than the uneven distribution of land cover types
among samples are the major cause of the decreased overall accuracy of the
model over unknown time and space.</p>
      <p id="d1e5369">As emphasized in Gruber et al. (2020), despite the fact that
downscaled soil moisture products usually provide more spatial details
visually they may not reflect real soil moisture variations, and it is thus
necessary to estimate the spatial <inline-formula><mml:math id="M250" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> for the downscaled products, in addition
to temporal analyses. Then, Crow et al. (2022) defined the success of a downscaling
algorithm as achieving either better temporal accuracy or spatial skill than
the original coarse-scale product that is interpolated onto the fine-scale
spatial grid. As can be seen from Fig. 5a, the temporal <inline-formula><mml:math id="M251" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values achieved
by the XGBoost model at representative stations are similar to those of the
coarse-scale ERA5-Land soil moisture product, and Fig. 8b shows that the
GLASS and ERA5-Land products achieved similar spatial <inline-formula><mml:math id="M252" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values when they are
both resampled to 0.25<inline-formula><mml:math id="M253" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> resolution. Therefore, to identify whether
the 1 km GLASS SM product actually have added value with respect to the
0.1<inline-formula><mml:math id="M254" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula> ERA5-Land product, we also calculated the spatial <inline-formula><mml:math id="M255" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> for the
XGBoost model on a daily basis using soil moisture measurements from
representative stations and then<?pagebreak page2073?> compared it with that of the ERA5-Land
product interpolated onto the 1 km grid. To make the comparison more
rigorous, soil moisture estimated using the 5-fold cross-validation method
from the model was adopted to calculate the spatial <inline-formula><mml:math id="M256" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> instead of the final
GLASS SM product (yielding even better results). As displayed in Fig. 12,
the spatial <inline-formula><mml:math id="M257" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values achieved by the XGBoost model at representative
stations improve significantly compared to those of the ERA5-Land product,
with the median spatial <inline-formula><mml:math id="M258" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> increasing from 0.60 to 0.66, and in most cases,
the difference in spatial <inline-formula><mml:math id="M259" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> (R_diff) between the XGBoost
model and ERA5-Land product is positive, with a median value of 0.06.
Accordingly, it is reasonable to believe that the 1 km GLASS SM product does
provide more spatial information which reflects fine-scale soil moisture
variations rather than just adding ineffective details.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F12"><?xmltex \currentcnt{12}?><?xmltex \def\figurename{Figure}?><label>Figure 12</label><caption><p id="d1e5450">Boxplot of the spatial <inline-formula><mml:math id="M260" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> calculated for the XGBoost model on a
daily basis using soil moisture measurements from representative stations,
in comparison with those of the ERA5-Land product. The difference in spatial
<inline-formula><mml:math id="M261" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> between the XGBoost model and ERA5-Land product is denoted as
R_diff.
</p></caption>
        <?xmltex \igopts{width=227.622047pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f12.png"/>

      </fig>

      <p id="d1e5473">To date, several studies have attempted to further improve the accuracy of
machine-learning-based soil moisture estimation models through different
strategies. Abbaszadeh et al. (2019) classified
in situ soil moisture stations within the CONUS according to soil texture
class, developing 12 distinct random forest models to downscale the SMAP
36 km soil moisture product using atmospheric, geophysical, and in situ soil
moisture datasets. Their downscaled 1 km soil moisture product achieved good
overall validation accuracy on both core validation sites and 300 sparse
soil moisture stations, with the proposed downscaling approach outperforming
the uniform downscaling approach. Similarly, Karthikeyan and Mishra (2021) clustered CONUS
into 11 homogeneous regions using a <inline-formula><mml:math id="M262" display="inline"><mml:mi>k</mml:mi></mml:math></inline-formula>-means algorithm based on a range of
climate and landscape variables, before training an XGBoost model for each
region and soil layer to downscale the SMAP Level 4 soil moisture product.
Validation at 79 independent soil moisture stations showed that the
downscaled product successfully captured temporal variations of measured
soil moisture. We also have attempted to classify the ISMN stations based on
their soil texture classes or climatic and environmental properties prior
to separately developing the models; however, the overall prediction
accuracy did not seem to improve significantly.</p>

      <?xmltex \floatpos{t}?><fig id="Ch1.F13" specific-use="star"><?xmltex \currentcnt{13}?><?xmltex \def\figurename{Figure}?><label>Figure 13</label><caption><p id="d1e5485">Scatterplots of mean measured and predicted soil moisture from
different models on the <bold>(a–b)</bold> YA and <bold>(c–d)</bold> YB soil moisture networks.
Point colors indicate the probability density, whereas the dashed red line
is the linear regression, and the solid black line is the <inline-formula><mml:math id="M263" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>:</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:math></inline-formula> relationship.</p></caption>
        <?xmltex \igopts{width=341.433071pt}?><graphic xlink:href="https://essd.copernicus.org/articles/15/2055/2023/essd-15-2055-2023-f13.png"/>

      </fig>

      <p id="d1e5512">Moreover, to mitigate the impacts of scale differences and improve the
prediction accuracy, we also trained a distinct XGBoost model (Model 2)
using the average soil moisture of all 30 m pixels within a 1 km pixel where
the station was located as the target variable, which was calculated using
the 30 m soil moisture estimation model developed by Zhang et al. (2022b). The overall accuracies
of Model 2 and the previously developed model trained directly using in situ
soil moisture (Model 1) on the YA and YB networks were then compared (Fig. 13). Here, it was found that Model 1 achieved good overall prediction
accuracy for both networks. But as also shown in Fig. 6, Model 1 showed
slight underestimation at higher soil moisture levels, especially in the YA
region. In contrast, while Model 2 obtained similar <inline-formula><mml:math id="M264" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values as Model 1 it
exhibited much more severe underestimation at higher soil moisture levels in
both the YA and YB networks. This may be attributed to the lack of high soil
moisture samples in the original 30 m soil moisture estimation model, which
were even scarcer after averaging to 1 km. To further improve Model 2
accuracy, uniform global sampling can be performed to generate a large
number of 1 km averaged soil moisture samples, but this would be rather
labor intensive. Alternatively, the global 1 km GLASS SM product generated
using Model 1 accurately captured the temporal variations of the in situ
soil moisture, and it exhibited high spatiotemporal consistency with microwave
soil moisture products, although some site-specific biases may exist while
validating the product against sparse soil moisture stations. Future studies
could focus on mitigating the impacts of scale differences on the machine
learning models, either by deploying more dense soil moisture monitoring
networks, or by further improving the accuracy of high-resolution (e.g., 30 m) but often spatiotemporally discontinuous soil moisture products, and then
training the 1 km spatiotemporally continuous GLASS SM model directly using
the higher-resolution soil moisture products.</p>
</sec>
<sec id="Ch1.S6">
  <label>6</label><title>Data availability</title>
      <p id="d1e5530">The global daily 1 km spatiotemporally continuous soil moisture product
(GLASS SM) from 2000 to 2020 is freely available at
<uri>http://glass.umd.edu/soil_moisture/</uri> (last access: 12 May 2023). In addition, for user's
convenience, the annual average global soil moisture dataset at 1 km
resolution was also generated, which can be downloaded from
<ext-link xlink:href="https://doi.org/10.5281/zenodo.7172664" ext-link-type="DOI">10.5281/zenodo.7172664</ext-link>  (Zhang et al.,
2022a). Note that this product represents the volumetric<?pagebreak page2074?> water content in
the uppermost soil layer (0–5 cm). Files are stored in the sinusoidal
projection and “GeoTIFF” format.</p>
</sec>
<sec id="Ch1.S7" sec-type="conclusions">
  <label>7</label><title>Conclusions</title>
      <p id="d1e5547">A global 1 km spatiotemporally continuous soil moisture product (GLASS SM)
was derived here using an XGBoost ensemble learning model that integrated
multisource datasets, including remotely sensed GLASS products; ERA5-Land
reanalysis products, as well as ground-based ISMN soil moisture; and static
auxiliary datasets. Validation of the XGBoost model was conducted using
three complementary validation strategies, and the GLASS SM product was also
evaluated across four independent networks, demonstrating the product's
strong capacity to capture temporal dynamics of measured soil moisture. This
global 1 km soil moisture product also exhibited high spatiotemporal
consistency with two global microwave soil moisture products. Overall, the
main findings of the study can be summarized as follows:
<list list-type="order"><list-item>
      <p id="d1e5552">When the samples from all stations and years were randomly divided into
training and test datasets, the XGBoost model achieved a high accuracy on
the random test samples. By using the TC method to select representative
stations, the validation accuracy of the model was further improved
significantly, with an overall <inline-formula><mml:math id="M265" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and RMSE of 0.941 and 0.038 m<inline-formula><mml:math id="M266" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M267" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively. Nevertheless, such high accuracy achieved by the
model on the random test sample is clearly a result of overfitting,</p></list-item><list-item>
      <p id="d1e5584">Training the model with representative stations selected by the TC
method also considerably improved its performance for site- or
year-independent samples (i.e., over unknown time and space). The overall
validation accuracy of the model trained using representative stations on
the site-independent test samples, which was least likely to be overfitted,
was an <inline-formula><mml:math id="M268" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> of 0.715 and RMSE of 0.079 m<inline-formula><mml:math id="M269" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M270" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>. Compared to the model
developed without station filtering, the accuracies of the model trained
using representative stations improved significantly on most stations, with
the median <inline-formula><mml:math id="M271" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> and ubRMSE of the model for each station increasing from 0.64
to 0.74 and decreasing from 0.055 to 0.052 m<inline-formula><mml:math id="M272" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M273" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>, respectively.</p></list-item><list-item>
      <p id="d1e5645">The time-series validation results of the 1 km GLASS SM product over
four independent networks indicated that the product can accurately capture
temporal variations in measured soil moisture under different<?pagebreak page2075?> climatic
conditions. The GLASS SM product achieved similar <inline-formula><mml:math id="M274" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values as the ERA5-Land
product, with the <inline-formula><mml:math id="M275" display="inline"><mml:mi>R</mml:mi></mml:math></inline-formula> values ranging from 0.69 to 0.89 and the ubRMSE values ranging
from 0.033 to 0.048 m<inline-formula><mml:math id="M276" display="inline"><mml:msup><mml:mi/><mml:mn mathvariant="normal">3</mml:mn></mml:msup></mml:math></inline-formula> m<inline-formula><mml:math id="M277" display="inline"><mml:msup><mml:mi/><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">3</mml:mn></mml:mrow></mml:msup></mml:math></inline-formula>.</p></list-item><list-item>
      <p id="d1e5684">Compared with the 1 km SMAP/Sentinel-1 SPL2SMAP_S soil
moisture product and the ESA CCI active–passive microwave combined soil
moisture product at 0.25<inline-formula><mml:math id="M278" display="inline"><mml:msup><mml:mi/><mml:mo>∘</mml:mo></mml:msup></mml:math></inline-formula>, the global 1 km spatiotemporally
continuous soil moisture product generated here had a more complete spatial
coverage, and it exhibited high spatiotemporal consistency with these two
products.</p></list-item></list>
The long-term (2000–2020) global GLASS SM product with high spatiotemporal
resolution (1 km, daily) and reliable accuracy generated here can benefit
climate change studies, hydrological modeling, and agricultural applications
at regional and global scales. It is also a valuable complement to currently
released global microwave and model-simulated soil moisture datasets. Future
studies could consider further improving and fully evaluating the accuracy
of the GLASS SM product.</p>
</sec>

      
      </body>
    <back><notes notes-type="authorcontribution"><title>Author contributions</title>

      <p id="d1e5701">SL and YZ developed the methodology and designed the
experiments. YZ, HM, BL, JX, GZ, XL, and CX collected and preprocessed the
data. YZ carried out the experiments. YZ, TH, and QW produced the product.
YZ prepared the manuscript with contributions from all co-authors.</p>
  </notes><notes notes-type="competinginterests"><title>Competing interests</title>

      <p id="d1e5707">The contact author has declared that none of the authors has any competing interests.</p>
  </notes><notes notes-type="disclaimer"><title>Disclaimer</title>

      <p id="d1e5713">Publisher's note: Copernicus Publications remains neutral with regard to jurisdictional claims in published maps and institutional affiliations.</p>
  </notes><ack><title>Acknowledgements</title><p id="d1e5719">We would like to thank the GLASS team for providing
albedo, LST, and LAI products; the ECMWF project for offering the ERA5-Land
soil moisture product; the SoilGrids project for the soil property dataset;
and Yamazaki's team for the MERIT DEM. We also appreciate the scientists and
networks who have shared their valuable ground-based soil moisture datasets,
as well as the ISMN project for making these datasets readily accessible. We
are also very grateful to the editors and reviewers for their valuable
suggestions, which helped us a lot to improve the paper.</p></ack><notes notes-type="financialsupport"><title>Financial support</title>

      <p id="d1e5724">This study was supported by the Open Research Program of
the International Research Center of Big Data for Sustainable Development
Goals (grant no. CBAS2022ORP01) and the National Natural Science Foundation
of China (grant no. 42090011).</p>
  </notes><notes notes-type="reviewstatement"><title>Review statement</title>

      <p id="d1e5731">This paper was edited by Kaiguang Zhao and reviewed by three anonymous referees.</p>
  </notes><ref-list>
    <title>References</title>

      <ref id="bib1.bib1"><label>1</label><?label 1?><mixed-citation>Abbaszadeh, P., Moradkhani, H., and Zhan, X.: Downscaling SMAP radiometer
soil moisture over the CONUS using an ensemble learning method, Water
Resour. Res., 55, 324–344, <ext-link xlink:href="https://doi.org/10.1029/2018WR023354" ext-link-type="DOI">10.1029/2018WR023354</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib2"><label>2</label><?label 1?><mixed-citation>Al Bitar, A., Mialon, A., Kerr, Y. H., Cabot, F., Richaume, P., Jacquette, E., Quesney, A., Mahmoodi, A., Tarot, S., Parrens, M., Al-Yaari, A., Pellarin, T., Rodriguez-Fernandez, N., and Wigneron, J.-P.: The global SMOS Level 3 daily soil moisture and brightness temperature maps, Earth Syst. Sci. Data, 9, 293–315, <ext-link xlink:href="https://doi.org/10.5194/essd-9-293-2017" ext-link-type="DOI">10.5194/essd-9-293-2017</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib3"><label>3</label><?label 1?><mixed-citation>Al-Yaari, A., Wigneron, J.-P., Dorigo, W., Colliander, A., Pellarin, T.,
Hahn, S., Mialon, A., Richaume, P., Fernandez-Moran, R., Fan, L., Kerr, Y.
H., and De Lannoy, G.: Assessment and inter-comparison of recently
developed/reprocessed microwave satellite soil moisture products using ISMN
ground-based measurements, Remote Sens. Environ., 224, 289–303,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2019.02.008" ext-link-type="DOI">10.1016/j.rse.2019.02.008</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib4"><label>4</label><?label 1?><mixed-citation>Anderson, W. B., Zaitchik, B. F., Hain, C. R., Anderson, M. C., Yilmaz, M. T., Mecikalski, J., and Schultz, L.: Towards an integrated soil moisture drought monitor for East Africa, Hydrol. Earth Syst. Sci., 16, 2893–2913, <ext-link xlink:href="https://doi.org/10.5194/hess-16-2893-2012" ext-link-type="DOI">10.5194/hess-16-2893-2012</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bib5"><label>5</label><?label 1?><mixed-citation>Babaeian, E., Sadeghi, M., Jones, S. B., Montzka, C., Vereecken, H., and
Tuller, M.: Ground, Proximal, and Satellite Remote Sensing of Soil Moisture,
Rev. Geophys., 57, 530–616, <ext-link xlink:href="https://doi.org/10.1029/2018RG000618" ext-link-type="DOI">10.1029/2018RG000618</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib6"><label>6</label><?label 1?><mixed-citation>Balenzano, A., Mattia, F., Satalino, G., Lovergine, F. P., Palmisano, D.,
and Davidson, M. W. J.: Dataset of Sentinel-1 surface soil moisture time
series at 1 km resolution over Southern Italy, Data Br., 38, 107345,
<ext-link xlink:href="https://doi.org/10.1016/J.DIB.2021.107345" ext-link-type="DOI">10.1016/J.DIB.2021.107345</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib7"><label>7</label><?label 1?><mixed-citation>Bartalis, Z., Wagner, W., Naeimi, V., Hasenauer, S., Scipal, K., Bonekamp,
H., Figa, J., and Anderson, C.: Initial soil moisture retrievals from the
METOP-A Advanced Scatterometer (ASCAT), Geophys. Res. Lett., 34, L20401,
<ext-link xlink:href="https://doi.org/10.1029/2007GL031088" ext-link-type="DOI">10.1029/2007GL031088</ext-link>, 2007.</mixed-citation></ref>
      <ref id="bib1.bib8"><label>8</label><?label 1?><mixed-citation>Bauer-Marschallinger, B., Freeman, V., Cao, S., Paulik, C., Schaufler, S.,
Stachl, T., Modanesi, S., Massari, C., Ciabatta, L., Brocca, L., and Wagner,
W.: Toward Global Soil Moisture Monitoring With Sentinel-1: Harnessing
Assets and Overcoming Obstacles, IEEE T. Geosci. Remote, 57,
520–539, <ext-link xlink:href="https://doi.org/10.1109/TGRS.2018.2858004" ext-link-type="DOI">10.1109/TGRS.2018.2858004</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib9"><label>9</label><?label 1?><mixed-citation>Beaudoing, H. and Rodell, M.: GLDAS Noah Land Surface Model L4 3 hourly 0.25
x 0.25 degree V2.1, Goddard Earth Sciences Data and Information Services
Center [data set], <ext-link xlink:href="https://doi.org/10.5067/E7TYRXPJKWOQ" ext-link-type="DOI">10.5067/E7TYRXPJKWOQ</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib10"><label>10</label><?label 1?><mixed-citation>Belgiu, M. and Drãguþ, L.: Random forest in remote sensing: A review
of applications and future directions, ISPRS J. Photogramm., 114, 24–31,
<ext-link xlink:href="https://doi.org/10.1016/j.isprsjprs.2016.01.011" ext-link-type="DOI">10.1016/j.isprsjprs.2016.01.011</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib11"><label>11</label><?label 1?><mixed-citation>Berg, A. and Sheffield, J.: Climate change and drought: the soil moisture
perspective, Current Climate Change Report, 4, 180–191,
<ext-link xlink:href="https://doi.org/10.1007/s40641-018-0095-0" ext-link-type="DOI">10.1007/s40641-018-0095-0</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bib12"><label>12</label><?label 1?><mixed-citation>Bindlish, R., Jackson, T., Sun, R., Cosh, M., Yueh, S., and Dinardo, S.:
Combined Passive and Active Microwave Observations of Soil Moisture During
CLASIC, IEEE Geosci. Remote S., 6, 644–648,
<ext-link xlink:href="https://doi.org/10.1109/LGRS.2009.2028441" ext-link-type="DOI">10.1109/LGRS.2009.2028441</ext-link>, 2009.</mixed-citation></ref>
      <ref id="bib1.bib13"><label>13</label><?label 1?><mixed-citation>Breiman, L.: Random forests, Mach. Learn., 45, 5–32,
<ext-link xlink:href="https://doi.org/10.1023/A:1010933404324" ext-link-type="DOI">10.1023/A:1010933404324</ext-link>, 2001.</mixed-citation></ref>
      <ref id="bib1.bib14"><label>14</label><?label 1?><mixed-citation>Brocca, L., Ciabatta, L., Massari, C., Camici, S., and Tarpanelli, A.: Soil
Moisture for Hydrological Applications: Open Questions and New
Opportunities, Water, 9, 140, <ext-link xlink:href="https://doi.org/10.3390/w9020140" ext-link-type="DOI">10.3390/w9020140</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib15"><label>15</label><?label 1?><mixed-citation>Brocca, L., Filippucci, P., Hahn, S., Ciabatta, L., Massari, C., Camici, S., Schüller, L., Bojkov, B., and Wagner, W.: SM2RAIN–ASCAT (2007–2018): global daily satellite rainfall data from ASCAT soil moisture observations, Earth Syst. Sci. Data, 11, 1583–1601, <ext-link xlink:href="https://doi.org/10.5194/essd-11-1583-2019" ext-link-type="DOI">10.5194/essd-11-1583-2019</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib16"><label>16</label><?label 1?><mixed-citation>Chan, S. K., Bindlish, R., O'Neill, P. E., Njoku, E., Jackson, T.,
Colliander, A., Chen, F., Burgin, M., Dunbar, S., Piepmeier, J., Yueh, S.,
Entekhabi, D., Cosh, M. H., Caldwell, T., Walker, J., Wu, X., Berg, A.,
Rowlandson, T., Pacheco, A., McNairn, H., Thibeault, M., Martínez, J.,
González, Á., Seyfried, M., Bosch, D., Starks, P., Goodrich, D.,
Prueger, J., Palecki, M., Small, E. E., Zreda, M., Calvet, J., Crow, W. T.,
and Kerr, Y.: Assessment of the SMAP passive soil moisture product, IEEE
T. Geosci. Remote, 54, 4994–5007,
<ext-link xlink:href="https://doi.org/10.1109/TGRS.2016.2561938" ext-link-type="DOI">10.1109/TGRS.2016.2561938</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib17"><label>17</label><?label 1?><mixed-citation>Chan, S. K., Bindlish, R., O'Neill, P., Jackson, T., Njoku, E., Dunbar, S.,
Chaubell, J., Piepmeier, J., Yueh, S., Entekhabi, D., Colliander, A., Chen,
F., Cosh, M. H., Caldwell, T., Walker, J., Berg, A., McNairn, H., Thibeault,
M., Martínez-Fernández, J., Uldall, F., Seyfried, M., Bosch, D.,
Starks, P., Holifield Collins, C., Prueger, J., van der Velde, R., Asanuma,
J., Palecki, M., Small, E. E., Zreda, M., Calvet, J., Crow, W. T., and Kerr,
Y.: Development and assessment of the SMAP enhanced passive soil moisture
product, Remote Sens. Environ., 204, 931–941,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2017.08.025" ext-link-type="DOI">10.1016/j.rse.2017.08.025</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bib18"><label>18</label><?label 1?><mixed-citation>Chen, T. and Guestrin, C.: XGBoost: A Scalable Tree Boosting System, in:
Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge
Discovery and Data Mining, 785–794,
<ext-link xlink:href="https://doi.org/10.1145/2939672.2939785" ext-link-type="DOI">10.1145/2939672.2939785</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib19"><label>19</label><?label 1?><mixed-citation>Colliander, A., Jackson, T. J., Bindlish, R., Chan, S., Das, N., Kim, S. B.,
Cosh, M. H., Dunbar, R. S., Dang, L., Pashaian, L., Asanuma, J., Aida, K.,
Berg, A., Rowlandson, T., Bosch, D., Caldwell, T., Caylor, K., Goodrich, D.,
al Jassar, H., Lopez-Baeza, E., Martínez-Fernández, J.,
González-Zamora, A., Livingston, S., McNairn, H., Pacheco, A.,
Moghaddam, M., Montzka, C., Notarnicola, C., Niedrist, G., Pellarin, T.,
Prueger, J., Pulliainen, J., Rautiainen, K., Ramos, J., Seyfried, M.,
Starks, P., Su, Z., Zeng, Y., van der Velde, R., Thibeault, M., Dorigo, W.,
Vreugdenhil, M., Walker, J. P., Wu, X., Monerris, A., O'Neill, P. E.,
Entekhabi, D., Njoku, E. G., and Yueh, S.: Validation of SMAP surface soil
moisture products with core validation sites, Remote Sens. Environ., 191,
215–231, <ext-link xlink:href="https://doi.org/10.1016/j.rse.2017.01.021" ext-link-type="DOI">10.1016/j.rse.2017.01.021</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib20"><label>20</label><?label 1?><mixed-citation>Crow, W. T., Berg, A. A., Cosh, M. H., Loew, A., Mohanty, B. P., Panciera,
R., De Rosnay, P., Ryu, D., and Walker, J. P.: Upscaling sparse ground-based
soil moisture observations for the validation of coarse-resolution satellite
soil moisture products, Rev. Geophys., 50, 1–20,
<ext-link xlink:href="https://doi.org/10.1029/2011RG000372" ext-link-type="DOI">10.1029/2011RG000372</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bib21"><label>21</label><?label 1?><mixed-citation>Crow, W. T., Chen, F., and Colliander, A.: Benchmarking downscaled
satellite-based soil moisture products using sparse, point-scale ground
observations, Remote Sens. Environ., 283, 113300,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2022.113300" ext-link-type="DOI">10.1016/j.rse.2022.113300</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bib22"><label>22</label><?label 1?><mixed-citation>Cui, D., Liang, S., Wang, D., and Liu, Z.: A 1 km global dataset of historical (1979–2013) and future (2020–2100) Köppen–Geiger climate classification and bioclimatic variables, Earth Syst. Sci. Data, 13, 5087–5114, <ext-link xlink:href="https://doi.org/10.5194/essd-13-5087-2021" ext-link-type="DOI">10.5194/essd-13-5087-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib23"><label>23</label><?label 1?><mixed-citation>Dai, Y., Shangguan, W., Wei, N., Xin, Q., Yuan, H., Zhang, S., Liu, S., Lu, X., Wang, D., and Yan, F.: A review of the global soil property maps for Earth system models, SOIL, 5, 137–158, <ext-link xlink:href="https://doi.org/10.5194/soil-5-137-2019" ext-link-type="DOI">10.5194/soil-5-137-2019</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib24"><label>24</label><?label 1?><mixed-citation>Das, N. N., Entekhabi, D., Dunbar, R. S., Chaubell, M. J., Colliander, A.,
Yueh, S., Jagdhuber, T., Chen, F., Crow, W., O'Neill, P. E., Walker, J. P.,
Berg, A., Bosch, D. D., Caldwell, T., Cosh, M. H., Collins, C. H.,
Lopez-Baeza, E., and Thibeault, M.: The SMAP and Copernicus Sentinel 1A/B
microwave active-passive high resolution surface soil moisture product,
Remote Sens. Environ., 233, 111380,
<ext-link xlink:href="https://doi.org/10.1016/J.RSE.2019.111380" ext-link-type="DOI">10.1016/J.RSE.2019.111380</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib25"><label>25</label><?label 1?><mixed-citation>Dorigo, W., Wagner, W., Albergel, C., Albrecht, F., Balsamo, G., Brocca, L.,
Chung, D., Ertl, M., Forkel, M., Gruber, A., Haas, E., Hamer, P. D.,
Hirschi, M., Ikonen, J., de Jeu, R., Kidd, R., Lahoz, W., Liu, Y. Y.,
Miralles, D., Mistelbauer, T., Nicolai-Shaw, N., Parinussa, R., Pratola, C.,
Reimer, C., van der Schalie, R., Seneviratne, S. I., Smolander, T., and
Lecomte, P.: ESA CCI Soil Moisture for improved Earth system understanding:
State-of-the art and future directions, Remote Sens. Environ., 203,
185–215, <ext-link xlink:href="https://doi.org/10.1016/J.RSE.2017.07.001" ext-link-type="DOI">10.1016/J.RSE.2017.07.001</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib26"><label>26</label><?label 1?><mixed-citation>Dorigo, W., Himmelbauer, I., Aberer, D., Schremmer, L., Petrakovic, I., Zappa, L., Preimesberger, W., Xaver, A., Annor, F., Ardö, J., Baldocchi, D., Bitelli, M., Blöschl, G., Bogena, H., Brocca, L., Calvet, J.-C., Camarero, J. J., Capello, G., Choi, M., Cosh, M. C., van de Giesen, N., Hajdu, I., Ikonen, J., Jensen, K. H., Kanniah, K. D., de Kat, I., Kirchengast, G., Kumar Rai, P., Kyrouac, J., Larson, K., Liu, S., Loew, A., Moghaddam, M., Martínez Fernández, J., Mattar Bader, C., Morbidelli, R., Musial, J. P., Osenga, E., Palecki, M. A., Pellarin, T., Petropoulos, G. P., Pfeil, I., Powers, J., Robock, A., Rüdiger, C., Rummel, U., Strobel, M., Su, Z., Sullivan, R., Tagesson, T., Varlagin, A., Vreugdenhil, M., Walker, J., Wen, J., Wenger, F., Wigneron, J. P., Woods, M., Yang, K., Zeng, Y., Zhang, X., Zreda, M., Dietrich, S., Gruber, A., van Oevelen, P., Wagner, W., Scipal, K., Drusch, M., and Sabia, R.: The International Soil Moisture Network: serving Earth system science for over a decade, Hydrol. Earth Syst. Sci., 25, 5749–5804, <ext-link xlink:href="https://doi.org/10.5194/hess-25-5749-2021" ext-link-type="DOI">10.5194/hess-25-5749-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib27"><label>27</label><?label 1?><mixed-citation>Dorigo, W. A., Xaver, A., Vreugdenhil, M., Gruber, A., Hegyiová, A.,
Sanchis-Dufau, A. D., Zamojski, D., Cordes, C., Wagner, W., and Drusch, M.:
Global Automated Quality Control of In Situ Soil Moisture Data from the
International Soil Moisture Network, Vadose Zone J., 12, vzj2012.0097,
<ext-link xlink:href="https://doi.org/10.2136/vzj2012.0097" ext-link-type="DOI">10.2136/vzj2012.0097</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bib28"><label>28</label><?label 1?><mixed-citation>Entekhabi, D., Reichle, R. H., Koster, R. D., and Crow, W. T.: Performance
metrics for soil moisture retrievals and application requirements, J.
Hydrometeorol., 11, 832–840, <ext-link xlink:href="https://doi.org/10.1175/2010JHM1223.1" ext-link-type="DOI">10.1175/2010JHM1223.1</ext-link>, 2010.</mixed-citation></ref>
      <?pagebreak page2077?><ref id="bib1.bib29"><label>29</label><?label 1?><mixed-citation>Friedl, M. and Sulla-Menashe, D.: MCD12Q1 MODIS/Terra<inline-formula><mml:math id="M279" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula>Aqua Land Cover Type
Yearly L3 Global 500m SIN Grid V006, NASA EOSDIS Land Processes DAAC [data
set], <ext-link xlink:href="https://doi.org/10.5067/MODIS/MCD12Q1.006" ext-link-type="DOI">10.5067/MODIS/MCD12Q1.006</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib30"><label>30</label><?label 1?><mixed-citation>Friedman, J. H.: Greedy function approximation: a gradient boosting machine,
Ann. Stat., 29, 1189–1232, <ext-link xlink:href="https://doi.org/10.1214/aos/1013203451" ext-link-type="DOI">10.1214/aos/1013203451</ext-link>, 2001.</mixed-citation></ref>
      <ref id="bib1.bib31"><label>31</label><?label 1?><mixed-citation>Ghulam, A., Qin, Q., Teyip, T., and Li, Z.-L.: Modified perpendicular
drought index (MPDI): a real-time drought monitoring method, ISPRS J.
Photogramm., 62, 150–164,
<ext-link xlink:href="https://doi.org/10.1016/j.isprsjprs.2007.03.002" ext-link-type="DOI">10.1016/j.isprsjprs.2007.03.002</ext-link>, 2007.</mixed-citation></ref>
      <ref id="bib1.bib32"><label>32</label><?label 1?><mixed-citation>Gislason, P. O., Benediktsson, J. A., and Sveinsson, J. R.: Random Forests
for land cover classification, Pattern Recognit. Lett., 27, 294–300,
<ext-link xlink:href="https://doi.org/10.1016/j.patrec.2005.08.011" ext-link-type="DOI">10.1016/j.patrec.2005.08.011</ext-link>, 2006.</mixed-citation></ref>
      <ref id="bib1.bib33"><label>33</label><?label 1?><mixed-citation>Gruber, A., Dorigo, W. A., Zwieback, S., Xaver, A., and Wagner, W.:
Characterizing Coarse-Scale Representativeness of in situ Soil Moisture
Measurements from the International Soil Moisture Network, Vadose Zone J.,
12, vzj2012.0170, <ext-link xlink:href="https://doi.org/10.2136/vzj2012.0170" ext-link-type="DOI">10.2136/vzj2012.0170</ext-link>,
2013.</mixed-citation></ref>
      <ref id="bib1.bib34"><label>34</label><?label 1?><mixed-citation>Gruber, A., Su, C.-H., Zwieback, S., Crow, W., Dorigo, W., and Wagner, W.:
Recent advances in (soil moisture) triple collocation analysis, Int. J.
Appl. Earth Obs., 45, 200–211,
<ext-link xlink:href="https://doi.org/10.1016/j.jag.2015.09.002" ext-link-type="DOI">10.1016/j.jag.2015.09.002</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib35"><label>35</label><?label 1?><mixed-citation>Gruber, A., Scanlon, T., van der Schalie, R., Wagner, W., and Dorigo, W.: Evolution of the ESA CCI Soil Moisture climate data records and their underlying merging methodology, Earth Syst. Sci. Data, 11, 717–739, <ext-link xlink:href="https://doi.org/10.5194/essd-11-717-2019" ext-link-type="DOI">10.5194/essd-11-717-2019</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib36"><label>36</label><?label 1?><mixed-citation>Gruber, A., De Lannoy, G., Albergel, C., Al-Yaari, A., Brocca, L., Calvet,
J.-C., Colliander, A., Cosh, M., Crow, W., Dorigo, W., Draper, C., Hirschi,
M., Kerr, Y., Konings, A., Lahoz, W., McColl, K., Montzka, C.,
Muñoz-Sabater, J., Peng, J., Reichle, R., Richaume, P., Rüdiger, C.,
Scanlon, T., van der Schalie, R., Wigneron, J.-P., and Wagner, W.:
Validation practices for satellite soil moisture retrievals: What are (the)
errors?, Remote Sens. Environ., 244, 111806,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2020.111806" ext-link-type="DOI">10.1016/j.rse.2020.111806</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib37"><label>37</label><?label 1?><mixed-citation>Hersbach, H., Bell, B., Berrisford, P., Hirahara, S., Horányi, A.,
Muñoz-Sabater, J., Nicolas, J., Peubey, C., Radu, R., Schepers, D.,
Simmons, A., Soci, C., Abdalla, S., Abellan, X., Balsamo, G., Bechtold, P.,
Biavati, G., Bidlot, J., Bonavita, M., De Chiara, G., Dahlgren, P., Dee, D.,
Diamantakis, M., Dragani, R., Flemming, J., Forbes, R., Fuentes, M., Geer,
A., Haimberger, L., Healy, S., Hogan, R. J., Hólm, E., Janisková,
M., Keeley, S., Laloyaux, P., Lopez, P., Lupu, C., Radnoti, G., de Rosnay,
P., Rozum, I., Vamborg, F., Villaume, S., and Thépaut, J.-N.: The ERA5
global reanalysis, Q. J. Roy. Meteor. Soc., 146, 1999–2049,
<ext-link xlink:href="https://doi.org/10.1002/qj.3803" ext-link-type="DOI">10.1002/qj.3803</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib38"><label>38</label><?label 1?><mixed-citation>Holzman, M. E., Rivas, R., and Piccolo, M. C.: Estimating soil moisture and
the relationship with crop yield using surface temperature and vegetation
index, Int. J. Appl. Earth Obs., 28, 181–192,
<ext-link xlink:href="https://doi.org/10.1016/j.jag.2013.12.006" ext-link-type="DOI">10.1016/j.jag.2013.12.006</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bib39"><label>39</label><?label 1?><mixed-citation>Humphrey, V., Berg, A., Ciais, P., Gentine, P., Jung, M., Reichstein, M.,
Seneviratne, S. I., and Frankenberg, C.: Soil moisture–atmosphere feedback
dominates land carbon uptake variability, Nature, 592, 65–69,
<ext-link xlink:href="https://doi.org/10.1038/s41586-021-03325-5" ext-link-type="DOI">10.1038/s41586-021-03325-5</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib40"><label>40</label><?label 1?><mixed-citation>Karthikeyan, L. and Mishra, A. K.: Multi-layer high-resolution soil moisture
estimation using machine learning over the United States, Remote Sens.
Environ., 266, 112706, <ext-link xlink:href="https://doi.org/10.1016/J.RSE.2021.112706" ext-link-type="DOI">10.1016/J.RSE.2021.112706</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib41"><label>41</label><?label 1?><mixed-citation>Kerr, Y. H., Al-Yaari, A., Rodriguez-Fernandez, N., Parrens, M., Molero, B.,
Leroux, D., Bircher, S., Mahmoodi, A., Mialon, A., Richaume, P., Delwart,
S., Al Bitar, A., Pellarin, T., Bindlish, R., Jackson, T. J., Rüdiger,
C., Waldteufel, P., Mecklenburg, S., and Wigneron, J. P.: Overview of SMOS
performance in terms of global soil moisture monitoring after six years in
operation, Remote Sens. Environ., 180, 40–63,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2016.02.042" ext-link-type="DOI">10.1016/j.rse.2016.02.042</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bib42"><label>42</label><?label 1?><mixed-citation>Kim, S., Zhang, R., Pham, H., and Sharma, A.: A Review of Satellite-Derived
Soil Moisture and Its Usage for Flood Estimation, Remote Sens. Earth Syst.
Sci., 2, 225–246, <ext-link xlink:href="https://doi.org/10.1007/s41976-019-00025-7" ext-link-type="DOI">10.1007/s41976-019-00025-7</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib43"><label>43</label><?label 1?><mixed-citation>Li, B., Liang, S., Liu, X., Ma, H., Chen, Y., Liang, T., and He, T.:
Estimation of all-sky 1 km land surface temperature over the conterminous
United States, Remote Sens. Environ., 266, 112707,
<ext-link xlink:href="https://doi.org/10.1016/J.RSE.2021.112707" ext-link-type="DOI">10.1016/J.RSE.2021.112707</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib44"><label>44</label><?label 1?><mixed-citation>Li, X., Wigneron, J.-P., Fan, L., Frappart, F., Yueh, S. H., Colliander, A.,
Ebtehaj, A., Gao, L., Fernandez-Moran, R., Liu, X., Wang, M., Ma, H., Moisy,
C., and Ciais, P.: A new SMAP soil moisture and vegetation optical depth
product (SMAP-IB): Algorithm, assessment and inter-comparison, Remote Sens.
Environ., 271, 112921,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2022.112921" ext-link-type="DOI">10.1016/j.rse.2022.112921</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bib45"><label>45</label><?label 1?><mixed-citation>Liang, S. and Wang, J. (Eds.): Chapter 18 – Soil moisture contents, in:
Advanced Remote Sensing, 2nd Edn., Academic Press, 685–711,
<ext-link xlink:href="https://doi.org/10.1016/B978-0-12-815826-5.00018-0" ext-link-type="DOI">10.1016/B978-0-12-815826-5.00018-0</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib46"><label>46</label><?label 1?><mixed-citation>Liang, S., Cheng, J., Jia, K., Jiang, B., Liu, Q., Xiao, Z., Yao, Y., Yuan,
W., Zhang, X., Zhao, X., and Zhou, J.: The Global Land Surface Satellite
(GLASS) Product Suite, B. Am. Meteorol. Soc., 102, E323–E337,
<ext-link xlink:href="https://doi.org/10.1175/BAMS-D-18-0341.1" ext-link-type="DOI">10.1175/BAMS-D-18-0341.1</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib47"><label>47</label><?label 1?><mixed-citation>Liu, L., Gudmundsson, L., Hauser, M., Qin, D., Li, S., and Seneviratne, S.
I.: Soil moisture dominates dryness stress on ecosystem production globally,
Nat. Commun., 11, 4892, <ext-link xlink:href="https://doi.org/10.1038/s41467-020-18631-1" ext-link-type="DOI">10.1038/s41467-020-18631-1</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib48"><label>48</label><?label 1?><mixed-citation>Liu, N. F., Liu, Q., Wang, L. Z., Liang, S. L., Wen, J. G., Qu, Y., and Liu, S. H.: A statistics-based temporal filter algorithm to map spatiotemporally continuous shortwave albedo from MODIS data, Hydrol. Earth Syst. Sci., 17, 2121–2129, <ext-link xlink:href="https://doi.org/10.5194/hess-17-2121-2013" ext-link-type="DOI">10.5194/hess-17-2121-2013</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bib49"><label>49</label><?label 1?><mixed-citation>Long, D., Bai, L., Yan, L., Zhang, C., Yang, W., Lei, H., Quan, J., Meng,
X., and Shi, C.: Generation of spatially complete and daily continuous
surface soil moisture of high spatial resolution, Remote Sens. Environ.,
233, 111364, <ext-link xlink:href="https://doi.org/10.1016/j.rse.2019.111364" ext-link-type="DOI">10.1016/j.rse.2019.111364</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib50"><label>50</label><?label 1?><mixed-citation>Luo, P., Song, Y., Huang, X., Ma, H., Liu, J., Yao, Y., and Meng, L.:
Identifying determinants of spatio-temporal disparities in soil moisture of
the Northern Hemisphere using a geographically optimal zones-based
heterogeneity model, ISPRS J. Photogramm., 185, 111–128,
<ext-link xlink:href="https://doi.org/10.1016/j.isprsjprs.2022.01.009" ext-link-type="DOI">10.1016/j.isprsjprs.2022.01.009</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bib51"><label>51</label><?label 1?><mixed-citation>Ma, H. and Liang, S.: Development of the GLASS 250-m leaf area index product
(version 6) from MODIS data using the bidirectional LSTM deep learning
model, Remote Sens. Environ., 273, 112985,
<ext-link xlink:href="https://doi.org/10.1016/J.RSE.2022.112985" ext-link-type="DOI">10.1016/J.RSE.2022.112985</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bib52"><label>52</label><?label 1?><mixed-citation>Ma, H., Zeng, J., Zhang, X., Fu, P., Zheng, D., Wigneron, J.-P., Chen, N.,
and Niyogi, D.: Evaluation of six satellite- and model-based surface soil
temperature datasets using globa<?pagebreak page2078?>l ground-based observations, Remote Sens.
Environ., 264, 112605,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2021.112605" ext-link-type="DOI">10.1016/j.rse.2021.112605</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib53"><label>53</label><?label 1?><mixed-citation>McColl, K. A., Vogelzang, J., Konings, A. G., Entekhabi, D., Piles, M., and
Stoffelen, A.: Extended triple collocation: Estimating errors and
correlation coefficients with respect to an unknown target, Geophys. Res.
Lett., 41, 6229–6236, <ext-link xlink:href="https://doi.org/10.1002/2014GL061322" ext-link-type="DOI">10.1002/2014GL061322</ext-link>,
2014.</mixed-citation></ref>
      <ref id="bib1.bib54"><label>54</label><?label 1?><mixed-citation>Molero, B., Leroux, D. J., Richaume, P., Kerr, Y. H., Merlin, O., Cosh, M.
H., and Bindlish, R.: Multi-Timescale Analysis of the Spatial
Representativeness of In Situ Soil Moisture Data within Satellite
Footprints, J. Geophys. Res.-Atmos., 123, 3–21,
<ext-link xlink:href="https://doi.org/10.1002/2017JD027478" ext-link-type="DOI">10.1002/2017JD027478</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bib55"><label>55</label><?label 1?><mixed-citation>Muñoz-Sabater, J.: ERA5-Land hourly data from 1981 to present,
Copernicus Climate Change Service (C3S) Climate Data Store (CDS) [data set],
<ext-link xlink:href="https://doi.org/10.24381/cds.e2161bac" ext-link-type="DOI">10.24381/cds.e2161bac</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib56"><label>56</label><?label 1?><mixed-citation>Muñoz-Sabater, J.: ERA5-Land hourly data from 1950 to 1980, Copernicus
Climate Change Service (C3S) Climate Data Store (CDS) [data set],
<ext-link xlink:href="https://doi.org/10.24381/cds.e2161bac" ext-link-type="DOI">10.24381/cds.e2161bac</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib57"><label>57</label><?label 1?><mixed-citation>Muñoz-Sabater, J., Dutra, E., Agustí-Panareda, A., Albergel, C., Arduini, G., Balsamo, G., Boussetta, S., Choulga, M., Harrigan, S., Hersbach, H., Martens, B., Miralles, D. G., Piles, M., Rodríguez-Fernández, N. J., Zsoter, E., Buontempo, C., and Thépaut, J.-N.: ERA5-Land: a state-of-the-art global reanalysis dataset for land applications, Earth Syst. Sci. Data, 13, 4349–4383, <ext-link xlink:href="https://doi.org/10.5194/essd-13-4349-2021" ext-link-type="DOI">10.5194/essd-13-4349-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib58"><label>58</label><?label 1?><mixed-citation>Naz, B. S., Kollet, S., Franssen, H.-J. H., Montzka, C., and Kurtz, W.: A 3
km spatially and temporally consistent European daily soil moisture
reanalysis from 2000 to 2015, Sci. Data, 7, 111,
<ext-link xlink:href="https://doi.org/10.1038/s41597-020-0450-6" ext-link-type="DOI">10.1038/s41597-020-0450-6</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib59"><label>59</label><?label 1?><mixed-citation>Njoku, E. G., Jackson, T. J., Lakshmi, V., Chan, T. K., and Nghiem, S. V:
Soil moisture retrieval from AMSR-E, IEEE T. Geosci. Remote, 41,
215–229, <ext-link xlink:href="https://doi.org/10.1109/TGRS.2002.808243" ext-link-type="DOI">10.1109/TGRS.2002.808243</ext-link>, 2003.</mixed-citation></ref>
      <ref id="bib1.bib60"><label>60</label><?label 1?><mixed-citation>O'Neill, P. E., Chan, S., Njoku, E. G., Jackson, T., Bindlish, R., and
Chaubell, J.: SMAP L3 Radiometer Global Daily 36 km EASE-Grid Soil Moisture,
Version 8, NASA National Snow and Ice Data Center Distributed Active Archive
Center [data set], <ext-link xlink:href="https://doi.org/10.5067/OMHVSRGFX38O" ext-link-type="DOI">10.5067/OMHVSRGFX38O</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib61"><label>61</label><?label 1?><mixed-citation>Owe, M., de Jeu, R., and Holmes, T.: Multisensor historical climatology of
satellite-derived global land surface moisture, J. Geophys. Res.-Earth, 113, F01002, <ext-link xlink:href="https://doi.org/10.1029/2007JF000769" ext-link-type="DOI">10.1029/2007JF000769</ext-link>, 2008.</mixed-citation></ref>
      <ref id="bib1.bib62"><label>62</label><?label 1?><mixed-citation>Peng, J., Loew, A., Merlin, O., and Verhoest, N. E. C.: A review of spatial
downscaling of satellite remotely sensed soil moisture, Rev. Geophys., 55,
341–366, <ext-link xlink:href="https://doi.org/10.1002/2016RG000543" ext-link-type="DOI">10.1002/2016RG000543</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib63"><label>63</label><?label 1?><mixed-citation>Peng, J., Albergel, C., Balenzano, A., Brocca, L., Cartus, O., Cosh, M. H.,
Crow, W. T., Dabrowska-Zielinska, K., Dadson, S., Davidson, M. W. J., de
Rosnay, P., Dorigo, W., Gruber, A., Hagemann, S., Hirschi, M., Kerr, Y. H.,
Lovergine, F., Mahecha, M. D., Marzahn, P., Mattia, F., Musial, J. P.,
Preuschmann, S., Reichle, R. H., Satalino, G., Silgram, M., van Bodegom, P.
M., Verhoest, N. E. C., Wagner, W., Walker, J. P., Wegmüller, U., and
Loew, A.: A roadmap for high-resolution satellite soil moisture applications
– confronting product characteristics with user requirements, Remote Sens.
Environ., 252, 112162, <ext-link xlink:href="https://doi.org/10.1016/J.RSE.2020.112162" ext-link-type="DOI">10.1016/J.RSE.2020.112162</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib64"><label>64</label><?label 1?><mixed-citation>Poggio, L., de Sousa, L. M., Batjes, N. H., Heuvelink, G. B. M., Kempen, B., Ribeiro, E., and Rossiter, D.: SoilGrids 2.0: producing soil information for the globe with quantified spatial uncertainty, SOIL, 7, 217–240, <ext-link xlink:href="https://doi.org/10.5194/soil-7-217-2021" ext-link-type="DOI">10.5194/soil-7-217-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib65"><label>65</label><?label 1?><mixed-citation>Qu, Y., Liu, Q., Liang, S., Wang, L., Liu, N., and Liu, S.:
Direct-Estimation Algorithm for Mapping Daily Land-Surface Broadband Albedo
From MODIS Data, IEEE T. Geosci. Remote, 52, 907–919,
<ext-link xlink:href="https://doi.org/10.1109/TGRS.2013.2245670" ext-link-type="DOI">10.1109/TGRS.2013.2245670</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bib66"><label>66</label><?label 1?><mixed-citation>Rahimzadeh-Bajgiran, P., Berg, A. A., Champagne, C., and Omasa, K.:
Estimation of soil moisture using optical/thermal infrared remote sensing in
the Canadian Prairies, ISPRS J. Photogramm., 83, 94–103,
<ext-link xlink:href="https://doi.org/10.1016/j.isprsjprs.2013.06.004" ext-link-type="DOI">10.1016/j.isprsjprs.2013.06.004</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bib67"><label>67</label><?label 1?><mixed-citation>Rodell, M., Houser, P. R., Jambor, U., Gottschalck, J., Mitchell, K., Meng,
C. J., Arsenault, K., Cosgrove, B., Radakovich, J., Bosilovich, M., Entin,
J. K., Walker, J. P., Lohmann, D., and Toll, D.: The global land data
assimilation system, B. Am. Meteorol. Soc., 85, 381–394,
<ext-link xlink:href="https://doi.org/10.1175/BAMS-85-3-381" ext-link-type="DOI">10.1175/BAMS-85-3-381</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bib68"><label>68</label><?label 1?><mixed-citation>Senyurek, V., Lei, F., Boyd, D., Kurum, M., Gurbuz, A. C., and Moorhead, R.:
Machine Learning-Based CYGNSS Soil Moisture Estimates over ISMN sites in
CONUS, Remote Sens., 12, 1168, <ext-link xlink:href="https://doi.org/10.3390/rs12071168" ext-link-type="DOI">10.3390/rs12071168</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bib69"><label>69</label><?label 1?><mixed-citation>Sheffield, J., Goteti, G., Wen, F., and Wood, E. F.: A simulated soil
moisture based drought analysis for the United States, J. Geophys. Res.,
109, 1–19, <ext-link xlink:href="https://doi.org/10.1029/2004JD005182" ext-link-type="DOI">10.1029/2004JD005182</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bib70"><label>70</label><?label 1?><mixed-citation>Shi, J., Zhao, T., Cui, Q., and Yao, P.: Airborne and Spaceborne Passive
Microwave Measurements of Soil Moisture, in: Observation and Measurement of
Ecohydrological Processes, edited by: Li, X. and Vereecken, H., Springer
Berlin Heidelberg, Berlin, Heidelberg, 71–105,
<ext-link xlink:href="https://doi.org/10.1007/978-3-662-48297-1_3" ext-link-type="DOI">10.1007/978-3-662-48297-1_3</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib71"><label>71</label><?label 1?><mixed-citation>Smith, A. B., Walker, J. P., Western, A. W., Young, R. I., Ellett, K. M.,
Pipunic, R. C., Grayson, R. B., Siriwardena, L., Chiew, F. H. S., and
Richter, H.: The Murrumbidgee soil moisture monitoring network data set,
Water Resour. Res., 48, W07701,
<ext-link xlink:href="https://doi.org/10.1029/2012WR011976" ext-link-type="DOI">10.1029/2012WR011976</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bib72"><label>72</label><?label 1?><mixed-citation>Song, J.: Bias corrections for Random Forest in regression using residual
rotation, J. Korean Stat. Soc., 44, 321–326,
<ext-link xlink:href="https://doi.org/10.1016/j.jkss.2015.01.003" ext-link-type="DOI">10.1016/j.jkss.2015.01.003</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bib73"><label>73</label><?label 1?><mixed-citation>Song, P., Zhang, Y., Guo, J., Shi, J., Zhao, T., and Tong, B.: A 1 km daily surface soil moisture dataset of enhanced coverage under all-weather conditions over China in 2003–2019, Earth Syst. Sci. Data, 14, 2613–2637, <ext-link xlink:href="https://doi.org/10.5194/essd-14-2613-2022" ext-link-type="DOI">10.5194/essd-14-2613-2022</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bib74"><label>74</label><?label 1?><mixed-citation>Starks, P. J., Fiebrich, C. A., Grimsley, D. L., Garbrecht, J. D., Steiner,
J. L., Guzman, J. A., and Moriasi, D. N.: Upper Washita River Experimental
Watersheds: Meteorologic and Soil Climate Measurement Networks, J. Environ.
Qual., 43, 1239–1249,
<ext-link xlink:href="https://doi.org/10.2134/jeq2013.08.0312" ext-link-type="DOI">10.2134/jeq2013.08.0312</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bib75"><label>75</label><?label 1?><mixed-citation>Stoffelen, A.: Toward the true near-surface wind speed: Error modeling and
calibration using triple collocation, J. Geophys. Res.-Oceans, 103,
7755–7766, <ext-link xlink:href="https://doi.org/10.1029/97JC03180" ext-link-type="DOI">10.1029/97JC03180</ext-link>, 1998.</mixed-citation></ref>
      <ref id="bib1.bib76"><label>76</label><?label 1?><mixed-citation>Vergopolan, N., Chaney, N. W., Pan, M., Sheffield, J., Beck, H. E.,
Ferguson, C. R., Torres-Rojas, L., Sadri, S., and Wood, E. F.:
SMAP-HydroBlocks, a 30-m satellite-based soil moisture dataset for the
conterminous US, Sci. Data, 8, 264,
<ext-link xlink:href="https://doi.org/10.1038/s41597-021-01050-2" ext-link-type="DOI">10.1038/s41597-021-01050-2</ext-link>, 2021.</mixed-citation></ref>
      <?pagebreak page2079?><ref id="bib1.bib77"><label>77</label><?label 1?><mixed-citation>Wagner, W., Pathe, C., Doubkova, M., Sabel, D., Bartsch, A., Hasenauer, S.,
Blöschl, G., Scipal, K., Martínez-Fernández, J., and Löw,
A.: Temporal Stability of Soil Moisture and Radar Backscatter Observed by
the Advanced Synthetic Aperture Radar (ASAR), Sensors, 8, 1174–1197,
<ext-link xlink:href="https://doi.org/10.3390/s80201174" ext-link-type="DOI">10.3390/s80201174</ext-link>, 2008.</mixed-citation></ref>
      <ref id="bib1.bib78"><label>78</label><?label 1?><mixed-citation>Wagner, W., Hahn, S., Kidd, R., Melzer, T., Bartalis, Z., Hasenauer, S.,
Figa-Saldaña, J., de Rosnay, P., Jann, A., Schneider, S., Komma, J.,
Kubu, G., Brugger, K., Aubrecht, C., Züger, J., Gangkofner, U.,
Kienberger, S., Brocca, L., Wang, Y., Blöschl, G., Eitzinger, J., and
Steinnocher, K.: The ASCAT Soil Moisture Product: A Review of its
Specifications, Validation Results, and Emerging Applications, Meteorol.
Z., 22, 5–33, <ext-link xlink:href="https://doi.org/10.1127/0941-2948/2013/0399" ext-link-type="DOI">10.1127/0941-2948/2013/0399</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bib79"><label>79</label><?label 1?><mixed-citation>Wang, Y., Leng, P., Peng, J., Marzahn, P., and Ludwig, R.: Global
assessments of two blended microwave soil moisture products CCI and SMOPS
with in-situ measurements and reanalysis data, Int. J. Appl. Earth Obs., 94, 102234, <ext-link xlink:href="https://doi.org/10.1016/J.JAG.2020.102234" ext-link-type="DOI">10.1016/J.JAG.2020.102234</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib80"><label>80</label><?label 1?><mixed-citation>Wigneron, J.-P., Li, X., Frappart, F., Fan, L., Al-Yaari, A., De Lannoy, G.,
Liu, X., Wang, M., Le Masson, E., and Moisy, C.: SMOS-IC data record of soil
moisture and L-VOD: Historical development, applications and perspectives,
Remote Sens. Environ., 254, 112238,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2020.112238" ext-link-type="DOI">10.1016/j.rse.2020.112238</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib81"><label>81</label><?label 1?><mixed-citation>Xu, X.: Evaluation of SMAP Level 2, 3, and 4 Soil Moisture Datasets over the
Great Lakes Region, Remote Sensing, 12, 3785, <ext-link xlink:href="https://doi.org/10.3390/rs12223785" ext-link-type="DOI">10.3390/rs12223785</ext-link>,
2020.</mixed-citation></ref>
      <ref id="bib1.bib82"><label>82</label><?label 1?><mixed-citation>Yamazaki, D., Ikeshima, D., Tawatari, R., Yamaguchi, T., O'Loughlin, F.,
Neal, J. C., Sampson, C. C., Kanae, S., and Bates, P. D.: A high-accuracy
map of global terrain elevations, Geophys. Res. Lett., 44, 5844–5853,
<ext-link xlink:href="https://doi.org/10.1002/2017GL072874" ext-link-type="DOI">10.1002/2017GL072874</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bib83"><label>83</label><?label 1?><mixed-citation>Yang, J., Zhang, P., Lu, N., Yang, Z., Shi, J., and Dong, C.: Improvements
on global meteorological observations from the current Fengyun 3 satellites
and beyond, Int. J. Digit. Earth, 5, 251–265,
<ext-link xlink:href="https://doi.org/10.1080/17538947.2012.658666" ext-link-type="DOI">10.1080/17538947.2012.658666</ext-link>, 2012.</mixed-citation></ref>
      <ref id="bib1.bib84"><label>84</label><?label 1?><mixed-citation>Yee, M. S., Walker, J. P., Rüdiger, C., Parinussa, R. M., Koike, T., and
Kerr, Y. H.: A comparison of SMOS and AMSR2 soil moisture using
representative sites of the OzNet monitoring network, Remote Sens. Environ.,
195, 297–312, <ext-link xlink:href="https://doi.org/10.1016/j.rse.2017.04.019" ext-link-type="DOI">10.1016/j.rse.2017.04.019</ext-link>, 2017.
</mixed-citation></ref><?xmltex \hack{\newpage}?>
      <ref id="bib1.bib85"><label>85</label><?label 1?><mixed-citation>Yuan, Q., Xu, H., Li, T., Shen, H., and Zhang, L.: Estimating surface soil
moisture from satellite observations using a generalized regression neural
network trained on sparse ground-based measurements in the continental U.S,
J. Hydrol., 580, 124351, <ext-link xlink:href="https://doi.org/10.1016/j.jhydrol.2019.124351" ext-link-type="DOI">10.1016/j.jhydrol.2019.124351</ext-link>,
2020.</mixed-citation></ref>
      <ref id="bib1.bib86"><label>86</label><?label 1?><mixed-citation>Yue, J., Tian, J., Tian, Q., Xu, K., and Xu, N.: Development of soil
moisture indices from differences in water absorption between
shortwave-infrared bands, ISPRS J. Photogramm., 154, 216–230,
<ext-link xlink:href="https://doi.org/10.1016/j.isprsjprs.2019.06.012" ext-link-type="DOI">10.1016/j.isprsjprs.2019.06.012</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bib87"><label>87</label><?label 1?><mixed-citation>Zappa, L., Forkel, M., Xaver, A., and Dorigo, W.: Deriving Field Scale Soil
Moisture from Satellite Observations and Ground Measurements in a Hilly
Agricultural Region, Remote Sensing, 11, 2596, <ext-link xlink:href="https://doi.org/10.3390/rs11222596" ext-link-type="DOI">10.3390/rs11222596</ext-link>,
2019.</mixed-citation></ref>
      <ref id="bib1.bib88"><label>88</label><?label 1?><mixed-citation>Zhang, Q., Yuan, Q., Li, J., Wang, Y., Sun, F., and Zhang, L.: Generating seamless global daily AMSR2 soil moisture (SGD-SM) long-term products for the years 2013–2019, Earth Syst. Sci. Data, 13, 1385–1401, <ext-link xlink:href="https://doi.org/10.5194/essd-13-1385-2021" ext-link-type="DOI">10.5194/essd-13-1385-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bib89"><label>89</label><?label 1?><mixed-citation>Zhang, Y., Liang, S., Ma, H., He, T., Wang, Q., and Li, B.: A global 1 km
surface soil moisture product from 2000 to 2020, Zenodo [data set],
<ext-link xlink:href="https://doi.org/10.5281/ZENODO.7172664" ext-link-type="DOI">10.5281/ZENODO.7172664</ext-link>, 2022a.</mixed-citation></ref>
      <ref id="bib1.bib90"><label>90</label><?label 1?><mixed-citation>Zhang, Y., Liang, S., Zhu, Z., Ma, H., and He, T.: Soil moisture content
retrieval from Landsat 8 data using ensemble learning, ISPRS J. Photogramm., 185, 32–47,
<ext-link xlink:href="https://doi.org/10.1016/j.isprsjprs.2022.01.005" ext-link-type="DOI">10.1016/j.isprsjprs.2022.01.005</ext-link>, 2022b.</mixed-citation></ref>
      <ref id="bib1.bib91"><label>91</label><?label 1?><mixed-citation>Zhao, T., Shi, J., Entekhabi, D., Jackson, T. J., Hu, L., Peng, Z., Yao, P.,
Li, S., and Kang, C. S.: Retrievals of soil moisture and vegetation optical
depth using a multi-channel collaborative algorithm, Remote Sens. Environ.,
257, 112321, <ext-link xlink:href="https://doi.org/10.1016/j.rse.2021.112321" ext-link-type="DOI">10.1016/j.rse.2021.112321</ext-link>,
2021.</mixed-citation></ref>
      <ref id="bib1.bib92"><label>92</label><?label 1?><mixed-citation>Zheng, C., Jia, L., and Zhao, T.: A 21-year dataset (2000–2020) of gap-free
global daily surface soil moisture at 1 km grid resolution, Sci. Data, 10,
139, <ext-link xlink:href="https://doi.org/10.1038/s41597-023-01991-w" ext-link-type="DOI">10.1038/s41597-023-01991-w</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bib93"><label>93</label><?label 1?><mixed-citation>Zheng, J., Zhao, T., Lü, H., Shi, J., Cosh, M. H., Ji, D., Jiang, L.,
Cui, Q., Lu, H., Yang, K., Wigneron, J.-P., Li, X., Zhu, Y., Hu, L., Peng,
Z., Zeng, Y., Wang, X., and Kang, C. S.: Assessment of 24 soil moisture
datasets using a new in situ network in the Shandian River Basin of China,
Remote Sens. Environ., 271, 112891,
<ext-link xlink:href="https://doi.org/10.1016/j.rse.2022.112891" ext-link-type="DOI">10.1016/j.rse.2022.112891</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bib94"><label>94</label><?label 1?><mixed-citation>Zhou, Z.-H.: Ensemble Learning, in: Machine Learning, Springer Singapore,
Singapore, 181–210,
<ext-link xlink:href="https://doi.org/10.1007/978-981-15-1967-3_8" ext-link-type="DOI">10.1007/978-981-15-1967-3_8</ext-link>, 2021.</mixed-citation></ref>

  </ref-list></back>
    <!--<article-title-html>Generation of global 1&thinsp;km daily soil moisture product from 2000 to 2020 using ensemble learning</article-title-html>
<abstract-html/>
<ref-html id="bib1.bib1"><label>1</label><mixed-citation>
      
Abbaszadeh, P., Moradkhani, H., and Zhan, X.: Downscaling SMAP radiometer
soil moisture over the CONUS using an ensemble learning method, Water
Resour. Res., 55, 324–344, <a href="https://doi.org/10.1029/2018WR023354" target="_blank">https://doi.org/10.1029/2018WR023354</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib2"><label>2</label><mixed-citation>
      Al Bitar, A., Mialon, A., Kerr, Y. H., Cabot, F., Richaume, P., Jacquette, E., Quesney, A., Mahmoodi, A., Tarot, S., Parrens, M., Al-Yaari, A., Pellarin, T., Rodriguez-Fernandez, N., and Wigneron, J.-P.: The global SMOS Level 3 daily soil moisture and brightness temperature maps, Earth Syst. Sci. Data, 9, 293–315, <a href="https://doi.org/10.5194/essd-9-293-2017" target="_blank">https://doi.org/10.5194/essd-9-293-2017</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib3"><label>3</label><mixed-citation>
      Al-Yaari, A., Wigneron, J.-P., Dorigo, W., Colliander, A., Pellarin, T.,
Hahn, S., Mialon, A., Richaume, P., Fernandez-Moran, R., Fan, L., Kerr, Y.
H., and De Lannoy, G.: Assessment and inter-comparison of recently
developed/reprocessed microwave satellite soil moisture products using ISMN
ground-based measurements, Remote Sens. Environ., 224, 289–303,
<a href="https://doi.org/10.1016/j.rse.2019.02.008" target="_blank">https://doi.org/10.1016/j.rse.2019.02.008</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib4"><label>4</label><mixed-citation>
      Anderson, W. B., Zaitchik, B. F., Hain, C. R., Anderson, M. C., Yilmaz, M. T., Mecikalski, J., and Schultz, L.: Towards an integrated soil moisture drought monitor for East Africa, Hydrol. Earth Syst. Sci., 16, 2893–2913, <a href="https://doi.org/10.5194/hess-16-2893-2012" target="_blank">https://doi.org/10.5194/hess-16-2893-2012</a>, 2012.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib5"><label>5</label><mixed-citation>
      Babaeian, E., Sadeghi, M., Jones, S. B., Montzka, C., Vereecken, H., and
Tuller, M.: Ground, Proximal, and Satellite Remote Sensing of Soil Moisture,
Rev. Geophys., 57, 530–616, <a href="https://doi.org/10.1029/2018RG000618" target="_blank">https://doi.org/10.1029/2018RG000618</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib6"><label>6</label><mixed-citation>
      Balenzano, A., Mattia, F., Satalino, G., Lovergine, F. P., Palmisano, D.,
and Davidson, M. W. J.: Dataset of Sentinel-1 surface soil moisture time
series at 1&thinsp;km resolution over Southern Italy, Data Br., 38, 107345,
<a href="https://doi.org/10.1016/J.DIB.2021.107345" target="_blank">https://doi.org/10.1016/J.DIB.2021.107345</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib7"><label>7</label><mixed-citation>
      Bartalis, Z., Wagner, W., Naeimi, V., Hasenauer, S., Scipal, K., Bonekamp,
H., Figa, J., and Anderson, C.: Initial soil moisture retrievals from the
METOP-A Advanced Scatterometer (ASCAT), Geophys. Res. Lett., 34, L20401,
<a href="https://doi.org/10.1029/2007GL031088" target="_blank">https://doi.org/10.1029/2007GL031088</a>, 2007.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib8"><label>8</label><mixed-citation>
      Bauer-Marschallinger, B., Freeman, V., Cao, S., Paulik, C., Schaufler, S.,
Stachl, T., Modanesi, S., Massari, C., Ciabatta, L., Brocca, L., and Wagner,
W.: Toward Global Soil Moisture Monitoring With Sentinel-1: Harnessing
Assets and Overcoming Obstacles, IEEE T. Geosci. Remote, 57,
520–539, <a href="https://doi.org/10.1109/TGRS.2018.2858004" target="_blank">https://doi.org/10.1109/TGRS.2018.2858004</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib9"><label>9</label><mixed-citation>
      Beaudoing, H. and Rodell, M.: GLDAS Noah Land Surface Model L4 3 hourly 0.25
x 0.25 degree V2.1, Goddard Earth Sciences Data and Information Services
Center [data set], <a href="https://doi.org/10.5067/E7TYRXPJKWOQ" target="_blank">https://doi.org/10.5067/E7TYRXPJKWOQ</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib10"><label>10</label><mixed-citation>
      Belgiu, M. and Drãguþ, L.: Random forest in remote sensing: A review
of applications and future directions, ISPRS J. Photogramm., 114, 24–31,
<a href="https://doi.org/10.1016/j.isprsjprs.2016.01.011" target="_blank">https://doi.org/10.1016/j.isprsjprs.2016.01.011</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib11"><label>11</label><mixed-citation>
      Berg, A. and Sheffield, J.: Climate change and drought: the soil moisture
perspective, Current Climate Change Report, 4, 180–191,
<a href="https://doi.org/10.1007/s40641-018-0095-0" target="_blank">https://doi.org/10.1007/s40641-018-0095-0</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib12"><label>12</label><mixed-citation>
      Bindlish, R., Jackson, T., Sun, R., Cosh, M., Yueh, S., and Dinardo, S.:
Combined Passive and Active Microwave Observations of Soil Moisture During
CLASIC, IEEE Geosci. Remote S., 6, 644–648,
<a href="https://doi.org/10.1109/LGRS.2009.2028441" target="_blank">https://doi.org/10.1109/LGRS.2009.2028441</a>, 2009.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib13"><label>13</label><mixed-citation>
      Breiman, L.: Random forests, Mach. Learn., 45, 5–32,
<a href="https://doi.org/10.1023/A:1010933404324" target="_blank">https://doi.org/10.1023/A:1010933404324</a>, 2001.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib14"><label>14</label><mixed-citation>
      Brocca, L., Ciabatta, L., Massari, C., Camici, S., and Tarpanelli, A.: Soil
Moisture for Hydrological Applications: Open Questions and New
Opportunities, Water, 9, 140, <a href="https://doi.org/10.3390/w9020140" target="_blank">https://doi.org/10.3390/w9020140</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib15"><label>15</label><mixed-citation>
      Brocca, L., Filippucci, P., Hahn, S., Ciabatta, L., Massari, C., Camici, S., Schüller, L., Bojkov, B., and Wagner, W.: SM2RAIN–ASCAT (2007–2018): global daily satellite rainfall data from ASCAT soil moisture observations, Earth Syst. Sci. Data, 11, 1583–1601, <a href="https://doi.org/10.5194/essd-11-1583-2019" target="_blank">https://doi.org/10.5194/essd-11-1583-2019</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib16"><label>16</label><mixed-citation>
      Chan, S. K., Bindlish, R., O'Neill, P. E., Njoku, E., Jackson, T.,
Colliander, A., Chen, F., Burgin, M., Dunbar, S., Piepmeier, J., Yueh, S.,
Entekhabi, D., Cosh, M. H., Caldwell, T., Walker, J., Wu, X., Berg, A.,
Rowlandson, T., Pacheco, A., McNairn, H., Thibeault, M., Martínez, J.,
González, Á., Seyfried, M., Bosch, D., Starks, P., Goodrich, D.,
Prueger, J., Palecki, M., Small, E. E., Zreda, M., Calvet, J., Crow, W. T.,
and Kerr, Y.: Assessment of the SMAP passive soil moisture product, IEEE
T. Geosci. Remote, 54, 4994–5007,
<a href="https://doi.org/10.1109/TGRS.2016.2561938" target="_blank">https://doi.org/10.1109/TGRS.2016.2561938</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib17"><label>17</label><mixed-citation>
      Chan, S. K., Bindlish, R., O'Neill, P., Jackson, T., Njoku, E., Dunbar, S.,
Chaubell, J., Piepmeier, J., Yueh, S., Entekhabi, D., Colliander, A., Chen,
F., Cosh, M. H., Caldwell, T., Walker, J., Berg, A., McNairn, H., Thibeault,
M., Martínez-Fernández, J., Uldall, F., Seyfried, M., Bosch, D.,
Starks, P., Holifield Collins, C., Prueger, J., van der Velde, R., Asanuma,
J., Palecki, M., Small, E. E., Zreda, M., Calvet, J., Crow, W. T., and Kerr,
Y.: Development and assessment of the SMAP enhanced passive soil moisture
product, Remote Sens. Environ., 204, 931–941,
<a href="https://doi.org/10.1016/j.rse.2017.08.025" target="_blank">https://doi.org/10.1016/j.rse.2017.08.025</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib18"><label>18</label><mixed-citation>
      Chen, T. and Guestrin, C.: XGBoost: A Scalable Tree Boosting System, in:
Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge
Discovery and Data Mining, 785–794,
<a href="https://doi.org/10.1145/2939672.2939785" target="_blank">https://doi.org/10.1145/2939672.2939785</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib19"><label>19</label><mixed-citation>
      Colliander, A., Jackson, T. J., Bindlish, R., Chan, S., Das, N., Kim, S. B.,
Cosh, M. H., Dunbar, R. S., Dang, L., Pashaian, L., Asanuma, J., Aida, K.,
Berg, A., Rowlandson, T., Bosch, D., Caldwell, T., Caylor, K., Goodrich, D.,
al Jassar, H., Lopez-Baeza, E., Martínez-Fernández, J.,
González-Zamora, A., Livingston, S., McNairn, H., Pacheco, A.,
Moghaddam, M., Montzka, C., Notarnicola, C., Niedrist, G., Pellarin, T.,
Prueger, J., Pulliainen, J., Rautiainen, K., Ramos, J., Seyfried, M.,
Starks, P., Su, Z., Zeng, Y., van der Velde, R., Thibeault, M., Dorigo, W.,
Vreugdenhil, M., Walker, J. P., Wu, X., Monerris, A., O'Neill, P. E.,
Entekhabi, D., Njoku, E. G., and Yueh, S.: Validation of SMAP surface soil
moisture products with core validation sites, Remote Sens. Environ., 191,
215–231, <a href="https://doi.org/10.1016/j.rse.2017.01.021" target="_blank">https://doi.org/10.1016/j.rse.2017.01.021</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib20"><label>20</label><mixed-citation>
      Crow, W. T., Berg, A. A., Cosh, M. H., Loew, A., Mohanty, B. P., Panciera,
R., De Rosnay, P., Ryu, D., and Walker, J. P.: Upscaling sparse ground-based
soil moisture observations for the validation of coarse-resolution satellite
soil moisture products, Rev. Geophys., 50, 1–20,
<a href="https://doi.org/10.1029/2011RG000372" target="_blank">https://doi.org/10.1029/2011RG000372</a>, 2012.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib21"><label>21</label><mixed-citation>
      Crow, W. T., Chen, F., and Colliander, A.: Benchmarking downscaled
satellite-based soil moisture products using sparse, point-scale ground
observations, Remote Sens. Environ., 283, 113300,
<a href="https://doi.org/10.1016/j.rse.2022.113300" target="_blank">https://doi.org/10.1016/j.rse.2022.113300</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib22"><label>22</label><mixed-citation>
      Cui, D., Liang, S., Wang, D., and Liu, Z.: A 1 km global dataset of historical (1979–2013) and future (2020–2100) Köppen–Geiger climate classification and bioclimatic variables, Earth Syst. Sci. Data, 13, 5087–5114, <a href="https://doi.org/10.5194/essd-13-5087-2021" target="_blank">https://doi.org/10.5194/essd-13-5087-2021</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib23"><label>23</label><mixed-citation>
      Dai, Y., Shangguan, W., Wei, N., Xin, Q., Yuan, H., Zhang, S., Liu, S., Lu, X., Wang, D., and Yan, F.: A review of the global soil property maps for Earth system models, SOIL, 5, 137–158, <a href="https://doi.org/10.5194/soil-5-137-2019" target="_blank">https://doi.org/10.5194/soil-5-137-2019</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib24"><label>24</label><mixed-citation>
      Das, N. N., Entekhabi, D., Dunbar, R. S., Chaubell, M. J., Colliander, A.,
Yueh, S., Jagdhuber, T., Chen, F., Crow, W., O'Neill, P. E., Walker, J. P.,
Berg, A., Bosch, D. D., Caldwell, T., Cosh, M. H., Collins, C. H.,
Lopez-Baeza, E., and Thibeault, M.: The SMAP and Copernicus Sentinel 1A/B
microwave active-passive high resolution surface soil moisture product,
Remote Sens. Environ., 233, 111380,
<a href="https://doi.org/10.1016/J.RSE.2019.111380" target="_blank">https://doi.org/10.1016/J.RSE.2019.111380</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib25"><label>25</label><mixed-citation>
      Dorigo, W., Wagner, W., Albergel, C., Albrecht, F., Balsamo, G., Brocca, L.,
Chung, D., Ertl, M., Forkel, M., Gruber, A., Haas, E., Hamer, P. D.,
Hirschi, M., Ikonen, J., de Jeu, R., Kidd, R., Lahoz, W., Liu, Y. Y.,
Miralles, D., Mistelbauer, T., Nicolai-Shaw, N., Parinussa, R., Pratola, C.,
Reimer, C., van der Schalie, R., Seneviratne, S. I., Smolander, T., and
Lecomte, P.: ESA CCI Soil Moisture for improved Earth system understanding:
State-of-the art and future directions, Remote Sens. Environ., 203,
185–215, <a href="https://doi.org/10.1016/J.RSE.2017.07.001" target="_blank">https://doi.org/10.1016/J.RSE.2017.07.001</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib26"><label>26</label><mixed-citation>
      Dorigo, W., Himmelbauer, I., Aberer, D., Schremmer, L., Petrakovic, I., Zappa, L., Preimesberger, W., Xaver, A., Annor, F., Ardö, J., Baldocchi, D., Bitelli, M., Blöschl, G., Bogena, H., Brocca, L., Calvet, J.-C., Camarero, J. J., Capello, G., Choi, M., Cosh, M. C., van de Giesen, N., Hajdu, I., Ikonen, J., Jensen, K. H., Kanniah, K. D., de Kat, I., Kirchengast, G., Kumar Rai, P., Kyrouac, J., Larson, K., Liu, S., Loew, A., Moghaddam, M., Martínez Fernández, J., Mattar Bader, C., Morbidelli, R., Musial, J. P., Osenga, E., Palecki, M. A., Pellarin, T., Petropoulos, G. P., Pfeil, I., Powers, J., Robock, A., Rüdiger, C., Rummel, U., Strobel, M., Su, Z., Sullivan, R., Tagesson, T., Varlagin, A., Vreugdenhil, M., Walker, J., Wen, J., Wenger, F., Wigneron, J. P., Woods, M., Yang, K., Zeng, Y., Zhang, X., Zreda, M., Dietrich, S., Gruber, A., van Oevelen, P., Wagner, W., Scipal, K., Drusch, M., and Sabia, R.: The International Soil Moisture Network: serving Earth system science for over a decade, Hydrol. Earth Syst. Sci., 25, 5749–5804, <a href="https://doi.org/10.5194/hess-25-5749-2021" target="_blank">https://doi.org/10.5194/hess-25-5749-2021</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib27"><label>27</label><mixed-citation>
      Dorigo, W. A., Xaver, A., Vreugdenhil, M., Gruber, A., Hegyiová, A.,
Sanchis-Dufau, A. D., Zamojski, D., Cordes, C., Wagner, W., and Drusch, M.:
Global Automated Quality Control of In Situ Soil Moisture Data from the
International Soil Moisture Network, Vadose Zone J., 12, vzj2012.0097,
<a href="https://doi.org/10.2136/vzj2012.0097" target="_blank">https://doi.org/10.2136/vzj2012.0097</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib28"><label>28</label><mixed-citation>
      Entekhabi, D., Reichle, R. H., Koster, R. D., and Crow, W. T.: Performance
metrics for soil moisture retrievals and application requirements, J.
Hydrometeorol., 11, 832–840, <a href="https://doi.org/10.1175/2010JHM1223.1" target="_blank">https://doi.org/10.1175/2010JHM1223.1</a>, 2010.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib29"><label>29</label><mixed-citation>
      Friedl, M. and Sulla-Menashe, D.: MCD12Q1 MODIS/Terra+Aqua Land Cover Type
Yearly L3 Global 500m SIN Grid V006, NASA EOSDIS Land Processes DAAC [data
set], <a href="https://doi.org/10.5067/MODIS/MCD12Q1.006" target="_blank">https://doi.org/10.5067/MODIS/MCD12Q1.006</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib30"><label>30</label><mixed-citation>
      Friedman, J. H.: Greedy function approximation: a gradient boosting machine,
Ann. Stat., 29, 1189–1232, <a href="https://doi.org/10.1214/aos/1013203451" target="_blank">https://doi.org/10.1214/aos/1013203451</a>, 2001.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib31"><label>31</label><mixed-citation>
      Ghulam, A., Qin, Q., Teyip, T., and Li, Z.-L.: Modified perpendicular
drought index (MPDI): a real-time drought monitoring method, ISPRS J.
Photogramm., 62, 150–164,
<a href="https://doi.org/10.1016/j.isprsjprs.2007.03.002" target="_blank">https://doi.org/10.1016/j.isprsjprs.2007.03.002</a>, 2007.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib32"><label>32</label><mixed-citation>
      Gislason, P. O., Benediktsson, J. A., and Sveinsson, J. R.: Random Forests
for land cover classification, Pattern Recognit. Lett., 27, 294–300,
<a href="https://doi.org/10.1016/j.patrec.2005.08.011" target="_blank">https://doi.org/10.1016/j.patrec.2005.08.011</a>, 2006.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib33"><label>33</label><mixed-citation>
      Gruber, A., Dorigo, W. A., Zwieback, S., Xaver, A., and Wagner, W.:
Characterizing Coarse-Scale Representativeness of in situ Soil Moisture
Measurements from the International Soil Moisture Network, Vadose Zone J.,
12, vzj2012.0170, <a href="https://doi.org/10.2136/vzj2012.0170" target="_blank">https://doi.org/10.2136/vzj2012.0170</a>,
2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib34"><label>34</label><mixed-citation>
      Gruber, A., Su, C.-H., Zwieback, S., Crow, W., Dorigo, W., and Wagner, W.:
Recent advances in (soil moisture) triple collocation analysis, Int. J.
Appl. Earth Obs., 45, 200–211,
<a href="https://doi.org/10.1016/j.jag.2015.09.002" target="_blank">https://doi.org/10.1016/j.jag.2015.09.002</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib35"><label>35</label><mixed-citation>
      
Gruber, A., Scanlon, T., van der Schalie, R., Wagner, W., and Dorigo, W.: Evolution of the ESA CCI Soil Moisture climate data records and their underlying merging methodology, Earth Syst. Sci. Data, 11, 717–739, <a href="https://doi.org/10.5194/essd-11-717-2019" target="_blank">https://doi.org/10.5194/essd-11-717-2019</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib36"><label>36</label><mixed-citation>
      Gruber, A., De Lannoy, G., Albergel, C., Al-Yaari, A., Brocca, L., Calvet,
J.-C., Colliander, A., Cosh, M., Crow, W., Dorigo, W., Draper, C., Hirschi,
M., Kerr, Y., Konings, A., Lahoz, W., McColl, K., Montzka, C.,
Muñoz-Sabater, J., Peng, J., Reichle, R., Richaume, P., Rüdiger, C.,
Scanlon, T., van der Schalie, R., Wigneron, J.-P., and Wagner, W.:
Validation practices for satellite soil moisture retrievals: What are (the)
errors?, Remote Sens. Environ., 244, 111806,
<a href="https://doi.org/10.1016/j.rse.2020.111806" target="_blank">https://doi.org/10.1016/j.rse.2020.111806</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib37"><label>37</label><mixed-citation>
      Hersbach, H., Bell, B., Berrisford, P., Hirahara, S., Horányi, A.,
Muñoz-Sabater, J., Nicolas, J., Peubey, C., Radu, R., Schepers, D.,
Simmons, A., Soci, C., Abdalla, S., Abellan, X., Balsamo, G., Bechtold, P.,
Biavati, G., Bidlot, J., Bonavita, M., De Chiara, G., Dahlgren, P., Dee, D.,
Diamantakis, M., Dragani, R., Flemming, J., Forbes, R., Fuentes, M., Geer,
A., Haimberger, L., Healy, S., Hogan, R. J., Hólm, E., Janisková,
M., Keeley, S., Laloyaux, P., Lopez, P., Lupu, C., Radnoti, G., de Rosnay,
P., Rozum, I., Vamborg, F., Villaume, S., and Thépaut, J.-N.: The ERA5
global reanalysis, Q. J. Roy. Meteor. Soc., 146, 1999–2049,
<a href="https://doi.org/10.1002/qj.3803" target="_blank">https://doi.org/10.1002/qj.3803</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib38"><label>38</label><mixed-citation>
      Holzman, M. E., Rivas, R., and Piccolo, M. C.: Estimating soil moisture and
the relationship with crop yield using surface temperature and vegetation
index, Int. J. Appl. Earth Obs., 28, 181–192,
<a href="https://doi.org/10.1016/j.jag.2013.12.006" target="_blank">https://doi.org/10.1016/j.jag.2013.12.006</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib39"><label>39</label><mixed-citation>
      Humphrey, V., Berg, A., Ciais, P., Gentine, P., Jung, M., Reichstein, M.,
Seneviratne, S. I., and Frankenberg, C.: Soil moisture–atmosphere feedback
dominates land carbon uptake variability, Nature, 592, 65–69,
<a href="https://doi.org/10.1038/s41586-021-03325-5" target="_blank">https://doi.org/10.1038/s41586-021-03325-5</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib40"><label>40</label><mixed-citation>
      Karthikeyan, L. and Mishra, A. K.: Multi-layer high-resolution soil moisture
estimation using machine learning over the United States, Remote Sens.
Environ., 266, 112706, <a href="https://doi.org/10.1016/J.RSE.2021.112706" target="_blank">https://doi.org/10.1016/J.RSE.2021.112706</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib41"><label>41</label><mixed-citation>
      Kerr, Y. H., Al-Yaari, A., Rodriguez-Fernandez, N., Parrens, M., Molero, B.,
Leroux, D., Bircher, S., Mahmoodi, A., Mialon, A., Richaume, P., Delwart,
S., Al Bitar, A., Pellarin, T., Bindlish, R., Jackson, T. J., Rüdiger,
C., Waldteufel, P., Mecklenburg, S., and Wigneron, J. P.: Overview of SMOS
performance in terms of global soil moisture monitoring after six years in
operation, Remote Sens. Environ., 180, 40–63,
<a href="https://doi.org/10.1016/j.rse.2016.02.042" target="_blank">https://doi.org/10.1016/j.rse.2016.02.042</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib42"><label>42</label><mixed-citation>
      Kim, S., Zhang, R., Pham, H., and Sharma, A.: A Review of Satellite-Derived
Soil Moisture and Its Usage for Flood Estimation, Remote Sens. Earth Syst.
Sci., 2, 225–246, <a href="https://doi.org/10.1007/s41976-019-00025-7" target="_blank">https://doi.org/10.1007/s41976-019-00025-7</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib43"><label>43</label><mixed-citation>
      Li, B., Liang, S., Liu, X., Ma, H., Chen, Y., Liang, T., and He, T.:
Estimation of all-sky 1 km land surface temperature over the conterminous
United States, Remote Sens. Environ., 266, 112707,
<a href="https://doi.org/10.1016/J.RSE.2021.112707" target="_blank">https://doi.org/10.1016/J.RSE.2021.112707</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib44"><label>44</label><mixed-citation>
      Li, X., Wigneron, J.-P., Fan, L., Frappart, F., Yueh, S. H., Colliander, A.,
Ebtehaj, A., Gao, L., Fernandez-Moran, R., Liu, X., Wang, M., Ma, H., Moisy,
C., and Ciais, P.: A new SMAP soil moisture and vegetation optical depth
product (SMAP-IB): Algorithm, assessment and inter-comparison, Remote Sens.
Environ., 271, 112921,
<a href="https://doi.org/10.1016/j.rse.2022.112921" target="_blank">https://doi.org/10.1016/j.rse.2022.112921</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib45"><label>45</label><mixed-citation>
      Liang, S. and Wang, J. (Eds.): Chapter 18 – Soil moisture contents, in:
Advanced Remote Sensing, 2nd Edn., Academic Press, 685–711,
<a href="https://doi.org/10.1016/B978-0-12-815826-5.00018-0" target="_blank">https://doi.org/10.1016/B978-0-12-815826-5.00018-0</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib46"><label>46</label><mixed-citation>
      Liang, S., Cheng, J., Jia, K., Jiang, B., Liu, Q., Xiao, Z., Yao, Y., Yuan,
W., Zhang, X., Zhao, X., and Zhou, J.: The Global Land Surface Satellite
(GLASS) Product Suite, B. Am. Meteorol. Soc., 102, E323–E337,
<a href="https://doi.org/10.1175/BAMS-D-18-0341.1" target="_blank">https://doi.org/10.1175/BAMS-D-18-0341.1</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib47"><label>47</label><mixed-citation>
      Liu, L., Gudmundsson, L., Hauser, M., Qin, D., Li, S., and Seneviratne, S.
I.: Soil moisture dominates dryness stress on ecosystem production globally,
Nat. Commun., 11, 4892, <a href="https://doi.org/10.1038/s41467-020-18631-1" target="_blank">https://doi.org/10.1038/s41467-020-18631-1</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib48"><label>48</label><mixed-citation>
       Liu, N. F., Liu, Q., Wang, L. Z., Liang, S. L., Wen, J. G., Qu, Y., and Liu, S. H.: A statistics-based temporal filter algorithm to map spatiotemporally continuous shortwave albedo from MODIS data, Hydrol. Earth Syst. Sci., 17, 2121–2129, <a href="https://doi.org/10.5194/hess-17-2121-2013" target="_blank">https://doi.org/10.5194/hess-17-2121-2013</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib49"><label>49</label><mixed-citation>
      Long, D., Bai, L., Yan, L., Zhang, C., Yang, W., Lei, H., Quan, J., Meng,
X., and Shi, C.: Generation of spatially complete and daily continuous
surface soil moisture of high spatial resolution, Remote Sens. Environ.,
233, 111364, <a href="https://doi.org/10.1016/j.rse.2019.111364" target="_blank">https://doi.org/10.1016/j.rse.2019.111364</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib50"><label>50</label><mixed-citation>
      Luo, P., Song, Y., Huang, X., Ma, H., Liu, J., Yao, Y., and Meng, L.:
Identifying determinants of spatio-temporal disparities in soil moisture of
the Northern Hemisphere using a geographically optimal zones-based
heterogeneity model, ISPRS J. Photogramm., 185, 111–128,
<a href="https://doi.org/10.1016/j.isprsjprs.2022.01.009" target="_blank">https://doi.org/10.1016/j.isprsjprs.2022.01.009</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib51"><label>51</label><mixed-citation>
      Ma, H. and Liang, S.: Development of the GLASS 250-m leaf area index product
(version 6) from MODIS data using the bidirectional LSTM deep learning
model, Remote Sens. Environ., 273, 112985,
<a href="https://doi.org/10.1016/J.RSE.2022.112985" target="_blank">https://doi.org/10.1016/J.RSE.2022.112985</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib52"><label>52</label><mixed-citation>
      Ma, H., Zeng, J., Zhang, X., Fu, P., Zheng, D., Wigneron, J.-P., Chen, N.,
and Niyogi, D.: Evaluation of six satellite- and model-based surface soil
temperature datasets using global ground-based observations, Remote Sens.
Environ., 264, 112605,
<a href="https://doi.org/10.1016/j.rse.2021.112605" target="_blank">https://doi.org/10.1016/j.rse.2021.112605</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib53"><label>53</label><mixed-citation>
      McColl, K. A., Vogelzang, J., Konings, A. G., Entekhabi, D., Piles, M., and
Stoffelen, A.: Extended triple collocation: Estimating errors and
correlation coefficients with respect to an unknown target, Geophys. Res.
Lett., 41, 6229–6236, <a href="https://doi.org/10.1002/2014GL061322" target="_blank">https://doi.org/10.1002/2014GL061322</a>,
2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib54"><label>54</label><mixed-citation>
      Molero, B., Leroux, D. J., Richaume, P., Kerr, Y. H., Merlin, O., Cosh, M.
H., and Bindlish, R.: Multi-Timescale Analysis of the Spatial
Representativeness of In Situ Soil Moisture Data within Satellite
Footprints, J. Geophys. Res.-Atmos., 123, 3–21,
<a href="https://doi.org/10.1002/2017JD027478" target="_blank">https://doi.org/10.1002/2017JD027478</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib55"><label>55</label><mixed-citation>
      Muñoz-Sabater, J.: ERA5-Land hourly data from 1981 to present,
Copernicus Climate Change Service (C3S) Climate Data Store (CDS) [data set],
<a href="https://doi.org/10.24381/cds.e2161bac" target="_blank">https://doi.org/10.24381/cds.e2161bac</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib56"><label>56</label><mixed-citation>
      Muñoz-Sabater, J.: ERA5-Land hourly data from 1950 to 1980, Copernicus
Climate Change Service (C3S) Climate Data Store (CDS) [data set],
<a href="https://doi.org/10.24381/cds.e2161bac" target="_blank">https://doi.org/10.24381/cds.e2161bac</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib57"><label>57</label><mixed-citation>
      Muñoz-Sabater, J., Dutra, E., Agustí-Panareda, A., Albergel, C., Arduini, G., Balsamo, G., Boussetta, S., Choulga, M., Harrigan, S., Hersbach, H., Martens, B., Miralles, D. G., Piles, M., Rodríguez-Fernández, N. J., Zsoter, E., Buontempo, C., and Thépaut, J.-N.: ERA5-Land: a state-of-the-art global reanalysis dataset for land applications, Earth Syst. Sci. Data, 13, 4349–4383, <a href="https://doi.org/10.5194/essd-13-4349-2021" target="_blank">https://doi.org/10.5194/essd-13-4349-2021</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib58"><label>58</label><mixed-citation>
      Naz, B. S., Kollet, S., Franssen, H.-J. H., Montzka, C., and Kurtz, W.: A 3
km spatially and temporally consistent European daily soil moisture
reanalysis from 2000 to 2015, Sci. Data, 7, 111,
<a href="https://doi.org/10.1038/s41597-020-0450-6" target="_blank">https://doi.org/10.1038/s41597-020-0450-6</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib59"><label>59</label><mixed-citation>
      Njoku, E. G., Jackson, T. J., Lakshmi, V., Chan, T. K., and Nghiem, S. V:
Soil moisture retrieval from AMSR-E, IEEE T. Geosci. Remote, 41,
215–229, <a href="https://doi.org/10.1109/TGRS.2002.808243" target="_blank">https://doi.org/10.1109/TGRS.2002.808243</a>, 2003.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib60"><label>60</label><mixed-citation>
      O'Neill, P. E., Chan, S., Njoku, E. G., Jackson, T., Bindlish, R., and
Chaubell, J.: SMAP L3 Radiometer Global Daily 36&thinsp;km EASE-Grid Soil Moisture,
Version 8, NASA National Snow and Ice Data Center Distributed Active Archive
Center [data set], <a href="https://doi.org/10.5067/OMHVSRGFX38O" target="_blank">https://doi.org/10.5067/OMHVSRGFX38O</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib61"><label>61</label><mixed-citation>
      Owe, M., de Jeu, R., and Holmes, T.: Multisensor historical climatology of
satellite-derived global land surface moisture, J. Geophys. Res.-Earth, 113, F01002, <a href="https://doi.org/10.1029/2007JF000769" target="_blank">https://doi.org/10.1029/2007JF000769</a>, 2008.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib62"><label>62</label><mixed-citation>
      Peng, J., Loew, A., Merlin, O., and Verhoest, N. E. C.: A review of spatial
downscaling of satellite remotely sensed soil moisture, Rev. Geophys., 55,
341–366, <a href="https://doi.org/10.1002/2016RG000543" target="_blank">https://doi.org/10.1002/2016RG000543</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib63"><label>63</label><mixed-citation>
      Peng, J., Albergel, C., Balenzano, A., Brocca, L., Cartus, O., Cosh, M. H.,
Crow, W. T., Dabrowska-Zielinska, K., Dadson, S., Davidson, M. W. J., de
Rosnay, P., Dorigo, W., Gruber, A., Hagemann, S., Hirschi, M., Kerr, Y. H.,
Lovergine, F., Mahecha, M. D., Marzahn, P., Mattia, F., Musial, J. P.,
Preuschmann, S., Reichle, R. H., Satalino, G., Silgram, M., van Bodegom, P.
M., Verhoest, N. E. C., Wagner, W., Walker, J. P., Wegmüller, U., and
Loew, A.: A roadmap for high-resolution satellite soil moisture applications
– confronting product characteristics with user requirements, Remote Sens.
Environ., 252, 112162, <a href="https://doi.org/10.1016/J.RSE.2020.112162" target="_blank">https://doi.org/10.1016/J.RSE.2020.112162</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib64"><label>64</label><mixed-citation>
      
Poggio, L., de Sousa, L. M., Batjes, N. H., Heuvelink, G. B. M., Kempen, B., Ribeiro, E., and Rossiter, D.: SoilGrids 2.0: producing soil information for the globe with quantified spatial uncertainty, SOIL, 7, 217–240, <a href="https://doi.org/10.5194/soil-7-217-2021" target="_blank">https://doi.org/10.5194/soil-7-217-2021</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib65"><label>65</label><mixed-citation>
      Qu, Y., Liu, Q., Liang, S., Wang, L., Liu, N., and Liu, S.:
Direct-Estimation Algorithm for Mapping Daily Land-Surface Broadband Albedo
From MODIS Data, IEEE T. Geosci. Remote, 52, 907–919,
<a href="https://doi.org/10.1109/TGRS.2013.2245670" target="_blank">https://doi.org/10.1109/TGRS.2013.2245670</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib66"><label>66</label><mixed-citation>
      Rahimzadeh-Bajgiran, P., Berg, A. A., Champagne, C., and Omasa, K.:
Estimation of soil moisture using optical/thermal infrared remote sensing in
the Canadian Prairies, ISPRS J. Photogramm., 83, 94–103,
<a href="https://doi.org/10.1016/j.isprsjprs.2013.06.004" target="_blank">https://doi.org/10.1016/j.isprsjprs.2013.06.004</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib67"><label>67</label><mixed-citation>
      Rodell, M., Houser, P. R., Jambor, U., Gottschalck, J., Mitchell, K., Meng,
C. J., Arsenault, K., Cosgrove, B., Radakovich, J., Bosilovich, M., Entin,
J. K., Walker, J. P., Lohmann, D., and Toll, D.: The global land data
assimilation system, B. Am. Meteorol. Soc., 85, 381–394,
<a href="https://doi.org/10.1175/BAMS-85-3-381" target="_blank">https://doi.org/10.1175/BAMS-85-3-381</a>, 2004.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib68"><label>68</label><mixed-citation>
      Senyurek, V., Lei, F., Boyd, D., Kurum, M., Gurbuz, A. C., and Moorhead, R.:
Machine Learning-Based CYGNSS Soil Moisture Estimates over ISMN sites in
CONUS, Remote Sens., 12, 1168, <a href="https://doi.org/10.3390/rs12071168" target="_blank">https://doi.org/10.3390/rs12071168</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib69"><label>69</label><mixed-citation>
      Sheffield, J., Goteti, G., Wen, F., and Wood, E. F.: A simulated soil
moisture based drought analysis for the United States, J. Geophys. Res.,
109, 1–19, <a href="https://doi.org/10.1029/2004JD005182" target="_blank">https://doi.org/10.1029/2004JD005182</a>, 2004.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib70"><label>70</label><mixed-citation>
      Shi, J., Zhao, T., Cui, Q., and Yao, P.: Airborne and Spaceborne Passive
Microwave Measurements of Soil Moisture, in: Observation and Measurement of
Ecohydrological Processes, edited by: Li, X. and Vereecken, H., Springer
Berlin Heidelberg, Berlin, Heidelberg, 71–105,
<a href="https://doi.org/10.1007/978-3-662-48297-1_3" target="_blank">https://doi.org/10.1007/978-3-662-48297-1_3</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib71"><label>71</label><mixed-citation>
      Smith, A. B., Walker, J. P., Western, A. W., Young, R. I., Ellett, K. M.,
Pipunic, R. C., Grayson, R. B., Siriwardena, L., Chiew, F. H. S., and
Richter, H.: The Murrumbidgee soil moisture monitoring network data set,
Water Resour. Res., 48, W07701,
<a href="https://doi.org/10.1029/2012WR011976" target="_blank">https://doi.org/10.1029/2012WR011976</a>, 2012.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib72"><label>72</label><mixed-citation>
      Song, J.: Bias corrections for Random Forest in regression using residual
rotation, J. Korean Stat. Soc., 44, 321–326,
<a href="https://doi.org/10.1016/j.jkss.2015.01.003" target="_blank">https://doi.org/10.1016/j.jkss.2015.01.003</a>, 2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib73"><label>73</label><mixed-citation>
      Song, P., Zhang, Y., Guo, J., Shi, J., Zhao, T., and Tong, B.: A 1&thinsp;km daily surface soil moisture dataset of enhanced coverage under all-weather conditions over China in 2003–2019, Earth Syst. Sci. Data, 14, 2613–2637, <a href="https://doi.org/10.5194/essd-14-2613-2022" target="_blank">https://doi.org/10.5194/essd-14-2613-2022</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib74"><label>74</label><mixed-citation>
      Starks, P. J., Fiebrich, C. A., Grimsley, D. L., Garbrecht, J. D., Steiner,
J. L., Guzman, J. A., and Moriasi, D. N.: Upper Washita River Experimental
Watersheds: Meteorologic and Soil Climate Measurement Networks, J. Environ.
Qual., 43, 1239–1249,
<a href="https://doi.org/10.2134/jeq2013.08.0312" target="_blank">https://doi.org/10.2134/jeq2013.08.0312</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib75"><label>75</label><mixed-citation>
      Stoffelen, A.: Toward the true near-surface wind speed: Error modeling and
calibration using triple collocation, J. Geophys. Res.-Oceans, 103,
7755–7766, <a href="https://doi.org/10.1029/97JC03180" target="_blank">https://doi.org/10.1029/97JC03180</a>, 1998.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib76"><label>76</label><mixed-citation>
      Vergopolan, N., Chaney, N. W., Pan, M., Sheffield, J., Beck, H. E.,
Ferguson, C. R., Torres-Rojas, L., Sadri, S., and Wood, E. F.:
SMAP-HydroBlocks, a 30-m satellite-based soil moisture dataset for the
conterminous US, Sci. Data, 8, 264,
<a href="https://doi.org/10.1038/s41597-021-01050-2" target="_blank">https://doi.org/10.1038/s41597-021-01050-2</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib77"><label>77</label><mixed-citation>
      Wagner, W., Pathe, C., Doubkova, M., Sabel, D., Bartsch, A., Hasenauer, S.,
Blöschl, G., Scipal, K., Martínez-Fernández, J., and Löw,
A.: Temporal Stability of Soil Moisture and Radar Backscatter Observed by
the Advanced Synthetic Aperture Radar (ASAR), Sensors, 8, 1174–1197,
<a href="https://doi.org/10.3390/s80201174" target="_blank">https://doi.org/10.3390/s80201174</a>, 2008.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib78"><label>78</label><mixed-citation>
      Wagner, W., Hahn, S., Kidd, R., Melzer, T., Bartalis, Z., Hasenauer, S.,
Figa-Saldaña, J., de Rosnay, P., Jann, A., Schneider, S., Komma, J.,
Kubu, G., Brugger, K., Aubrecht, C., Züger, J., Gangkofner, U.,
Kienberger, S., Brocca, L., Wang, Y., Blöschl, G., Eitzinger, J., and
Steinnocher, K.: The ASCAT Soil Moisture Product: A Review of its
Specifications, Validation Results, and Emerging Applications, Meteorol.
Z., 22, 5–33, <a href="https://doi.org/10.1127/0941-2948/2013/0399" target="_blank">https://doi.org/10.1127/0941-2948/2013/0399</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib79"><label>79</label><mixed-citation>
      Wang, Y., Leng, P., Peng, J., Marzahn, P., and Ludwig, R.: Global
assessments of two blended microwave soil moisture products CCI and SMOPS
with in-situ measurements and reanalysis data, Int. J. Appl. Earth Obs., 94, 102234, <a href="https://doi.org/10.1016/J.JAG.2020.102234" target="_blank">https://doi.org/10.1016/J.JAG.2020.102234</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib80"><label>80</label><mixed-citation>
      Wigneron, J.-P., Li, X., Frappart, F., Fan, L., Al-Yaari, A., De Lannoy, G.,
Liu, X., Wang, M., Le Masson, E., and Moisy, C.: SMOS-IC data record of soil
moisture and L-VOD: Historical development, applications and perspectives,
Remote Sens. Environ., 254, 112238,
<a href="https://doi.org/10.1016/j.rse.2020.112238" target="_blank">https://doi.org/10.1016/j.rse.2020.112238</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib81"><label>81</label><mixed-citation>
      Xu, X.: Evaluation of SMAP Level 2, 3, and 4 Soil Moisture Datasets over the
Great Lakes Region, Remote Sensing, 12, 3785, <a href="https://doi.org/10.3390/rs12223785" target="_blank">https://doi.org/10.3390/rs12223785</a>,
2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib82"><label>82</label><mixed-citation>
      Yamazaki, D., Ikeshima, D., Tawatari, R., Yamaguchi, T., O'Loughlin, F.,
Neal, J. C., Sampson, C. C., Kanae, S., and Bates, P. D.: A high-accuracy
map of global terrain elevations, Geophys. Res. Lett., 44, 5844–5853,
<a href="https://doi.org/10.1002/2017GL072874" target="_blank">https://doi.org/10.1002/2017GL072874</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib83"><label>83</label><mixed-citation>
      Yang, J., Zhang, P., Lu, N., Yang, Z., Shi, J., and Dong, C.: Improvements
on global meteorological observations from the current Fengyun 3 satellites
and beyond, Int. J. Digit. Earth, 5, 251–265,
<a href="https://doi.org/10.1080/17538947.2012.658666" target="_blank">https://doi.org/10.1080/17538947.2012.658666</a>, 2012.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib84"><label>84</label><mixed-citation>
      Yee, M. S., Walker, J. P., Rüdiger, C., Parinussa, R. M., Koike, T., and
Kerr, Y. H.: A comparison of SMOS and AMSR2 soil moisture using
representative sites of the OzNet monitoring network, Remote Sens. Environ.,
195, 297–312, <a href="https://doi.org/10.1016/j.rse.2017.04.019" target="_blank">https://doi.org/10.1016/j.rse.2017.04.019</a>, 2017.


    </mixed-citation></ref-html>
<ref-html id="bib1.bib85"><label>85</label><mixed-citation>
      Yuan, Q., Xu, H., Li, T., Shen, H., and Zhang, L.: Estimating surface soil
moisture from satellite observations using a generalized regression neural
network trained on sparse ground-based measurements in the continental U.S,
J. Hydrol., 580, 124351, <a href="https://doi.org/10.1016/j.jhydrol.2019.124351" target="_blank">https://doi.org/10.1016/j.jhydrol.2019.124351</a>,
2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib86"><label>86</label><mixed-citation>
      Yue, J., Tian, J., Tian, Q., Xu, K., and Xu, N.: Development of soil
moisture indices from differences in water absorption between
shortwave-infrared bands, ISPRS J. Photogramm., 154, 216–230,
<a href="https://doi.org/10.1016/j.isprsjprs.2019.06.012" target="_blank">https://doi.org/10.1016/j.isprsjprs.2019.06.012</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib87"><label>87</label><mixed-citation>
      Zappa, L., Forkel, M., Xaver, A., and Dorigo, W.: Deriving Field Scale Soil
Moisture from Satellite Observations and Ground Measurements in a Hilly
Agricultural Region, Remote Sensing, 11, 2596, <a href="https://doi.org/10.3390/rs11222596" target="_blank">https://doi.org/10.3390/rs11222596</a>,
2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib88"><label>88</label><mixed-citation>
      
Zhang, Q., Yuan, Q., Li, J., Wang, Y., Sun, F., and Zhang, L.: Generating seamless global daily AMSR2 soil moisture (SGD-SM) long-term products for the years 2013–2019, Earth Syst. Sci. Data, 13, 1385–1401, <a href="https://doi.org/10.5194/essd-13-1385-2021" target="_blank">https://doi.org/10.5194/essd-13-1385-2021</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib89"><label>89</label><mixed-citation>
      Zhang, Y., Liang, S., Ma, H., He, T., Wang, Q., and Li, B.: A global 1&thinsp;km
surface soil moisture product from 2000 to 2020, Zenodo [data set],
<a href="https://doi.org/10.5281/ZENODO.7172664" target="_blank">https://doi.org/10.5281/ZENODO.7172664</a>, 2022a.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib90"><label>90</label><mixed-citation>
      Zhang, Y., Liang, S., Zhu, Z., Ma, H., and He, T.: Soil moisture content
retrieval from Landsat 8 data using ensemble learning, ISPRS J. Photogramm., 185, 32–47,
<a href="https://doi.org/10.1016/j.isprsjprs.2022.01.005" target="_blank">https://doi.org/10.1016/j.isprsjprs.2022.01.005</a>, 2022b.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib91"><label>91</label><mixed-citation>
      Zhao, T., Shi, J., Entekhabi, D., Jackson, T. J., Hu, L., Peng, Z., Yao, P.,
Li, S., and Kang, C. S.: Retrievals of soil moisture and vegetation optical
depth using a multi-channel collaborative algorithm, Remote Sens. Environ.,
257, 112321, <a href="https://doi.org/10.1016/j.rse.2021.112321" target="_blank">https://doi.org/10.1016/j.rse.2021.112321</a>,
2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib92"><label>92</label><mixed-citation>
      Zheng, C., Jia, L., and Zhao, T.: A 21-year dataset (2000–2020) of gap-free
global daily surface soil moisture at 1&thinsp;km grid resolution, Sci. Data, 10,
139, <a href="https://doi.org/10.1038/s41597-023-01991-w" target="_blank">https://doi.org/10.1038/s41597-023-01991-w</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib93"><label>93</label><mixed-citation>
      Zheng, J., Zhao, T., Lü, H., Shi, J., Cosh, M. H., Ji, D., Jiang, L.,
Cui, Q., Lu, H., Yang, K., Wigneron, J.-P., Li, X., Zhu, Y., Hu, L., Peng,
Z., Zeng, Y., Wang, X., and Kang, C. S.: Assessment of 24 soil moisture
datasets using a new in situ network in the Shandian River Basin of China,
Remote Sens. Environ., 271, 112891,
<a href="https://doi.org/10.1016/j.rse.2022.112891" target="_blank">https://doi.org/10.1016/j.rse.2022.112891</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib94"><label>94</label><mixed-citation>
      Zhou, Z.-H.: Ensemble Learning, in: Machine Learning, Springer Singapore,
Singapore, 181–210,
<a href="https://doi.org/10.1007/978-981-15-1967-3_8" target="_blank">https://doi.org/10.1007/978-981-15-1967-3_8</a>, 2021.

    </mixed-citation></ref-html>--></article>
