<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD Journal Publishing with OASIS Tables v3.0 20080202//EN" "https://jats.nlm.nih.gov/nlm-dtd/publishing/3.0/journalpub-oasis3.dtd">
<article xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:oasis="http://docs.oasis-open.org/ns/oasis-exchange/table" xml:lang="en" dtd-version="3.0" article-type="data-paper">
  <front>
    <journal-meta><journal-id journal-id-type="publisher">ESSD</journal-id><journal-title-group>
    <journal-title>Earth System Science Data</journal-title>
    <abbrev-journal-title abbrev-type="publisher">ESSD</abbrev-journal-title><abbrev-journal-title abbrev-type="nlm-ta">Earth Syst. Sci. Data</abbrev-journal-title>
  </journal-title-group><issn pub-type="epub">1866-3516</issn><publisher>
    <publisher-name>Copernicus Publications</publisher-name>
    <publisher-loc>Göttingen, Germany</publisher-loc>
  </publisher></journal-meta>
    <article-meta>
      <article-id pub-id-type="doi">10.5194/essd-18-2769-2026</article-id><title-group><article-title>OpenSWI: a massive-scale benchmark dataset for surface wave dispersion curve inversion</article-title><alt-title>OpenSWI</alt-title>
      </title-group>
      <contrib-group>
        <contrib contrib-type="author" corresp="no" rid="aff1 aff2">
          <name><surname>Liu</surname><given-names>Feng</given-names></name>
          
        <ext-link>https://orcid.org/0009-0006-3381-2242</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2 aff3">
          <name><surname>Zhao</surname><given-names>Sijie</given-names></name>
          
        <ext-link>https://orcid.org/0009-0004-4007-743X</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Gu</surname><given-names>Xinyu</given-names></name>
          
        <ext-link>https://orcid.org/0000-0003-3434-332X</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Ling</surname><given-names>Fenghua</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Zhuang</surname><given-names>Peiqin</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-1724-8980</ext-link></contrib>
        <contrib contrib-type="author" corresp="yes" rid="aff4">
          <name><surname>Li</surname><given-names>Yaxing</given-names></name>
          <email>yxli2024@cdut.edu.cn</email>
        </contrib>
        <contrib contrib-type="author" corresp="yes" rid="aff2">
          <name><surname>Su</surname><given-names>Rui</given-names></name>
          <email>surui@pjlab.org.cn</email>
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff5">
          <name><surname>Fang</surname><given-names>Lihua</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff5">
          <name><surname>Zhou</surname><given-names>Lianqing</given-names></name>
          
        <ext-link>https://orcid.org/0000-0002-5486-0159</ext-link></contrib>
        <contrib contrib-type="author" corresp="no" rid="aff4">
          <name><surname>Huang</surname><given-names>Jianping</given-names></name>
          
        </contrib>
        <contrib contrib-type="author" corresp="no" rid="aff2">
          <name><surname>Bai</surname><given-names>Lei</given-names></name>
          
        </contrib>
        <aff id="aff1"><label>1</label><institution>School of Electronic Information and Electrical Engineering, Shanghai Jiao Tong University, Shanghai 200240, China</institution>
        </aff>
        <aff id="aff2"><label>2</label><institution>Shanghai Artificial Intelligence Laboratory, Shanghai 200232, China</institution>
        </aff>
        <aff id="aff3"><label>3</label><institution>School of Geography and Ocean Science, Nanjing University, Nanjing 210023, China</institution>
        </aff>
        <aff id="aff4"><label>4</label><institution>Key Laboratory of Earth Exploration and Information Technology of Ministry of Education, Chengdu University of Technology, Chengdu 610059, China</institution>
        </aff>
        <aff id="aff5"><label>5</label><institution>Institute of Earthquake Forecasting, China Earthquake Administration, Beijing 100036, China</institution>
        </aff>
      </contrib-group>
      <author-notes><corresp id="corr1">Yaxing Li (yxli2024@cdut.edu.cn) and Rui Su (surui@pjlab.org.cn)</corresp></author-notes><pub-date><day>21</day><month>April</month><year>2026</year></pub-date>
      
      <volume>18</volume>
      <issue>4</issue>
      <fpage>2769</fpage><lpage>2798</lpage>
      <history>
        <date date-type="received"><day>18</day><month>August</month><year>2025</year></date>
           <date date-type="rev-request"><day>5</day><month>November</month><year>2025</year></date>
           <date date-type="rev-recd"><day>16</day><month>March</month><year>2026</year></date>
           <date date-type="accepted"><day>10</day><month>April</month><year>2026</year></date>
      </history>
      <permissions>
        <copyright-statement>Copyright: © 2026 Feng Liu et al.</copyright-statement>
        <copyright-year>2026</copyright-year>
      <license license-type="open-access"><license-p>This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this licence, visit <ext-link ext-link-type="uri" xlink:href="https://creativecommons.org/licenses/by/4.0/">https://creativecommons.org/licenses/by/4.0/</ext-link></license-p></license></permissions><self-uri xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026.html">This article is available from https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026.html</self-uri><self-uri xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026.pdf">The full text article is available as a PDF file from https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026.pdf</self-uri>
      <abstract><title>Abstract</title>

      <p id="d2e205">Surface wave dispersion curve inversion plays a critical role in both shallow geophysical exploration and deep geological studies, yet it remains hindered by sensitivity to initial models, susceptibility to local minima, and low computational efficiency. Recently, data-driven deep learning methods, inspired by their success in computer vision and natural language processing, have shown promising potential to overcome these challenges. However, the lack of large-scale and diverse benchmark datasets remains a major obstacle to the development and evaluation of such methods. To address this gap, we introduce OpenSWI, a comprehensive benchmark dataset generated through the Surface Wave Inversion Dataset Preparation (SWIDP) pipeline. OpenSWI comprises two synthetic datasets tailored to different research scales and application scenarios, namely OpenSWI-shallow and OpenSWI-deep, as well as an AI-ready real-world dataset for generalization evaluation, OpenSWI-real. OpenSWI-shallow is derived from the 2-D geological model dataset OpenFWI, containing over 22 million 1-D velocity profiles paired with their fundamental-mode phase and group velocity dispersion curves, spanning a broad spectrum of shallow geological structures (e.g., flat layers, faults, folds, and realistic stratigraphy). OpenSWI-deep is built from 14 global and regional 3-D geological models, comprising approximately 1.26 million high-fidelity 1-D velocity-dispersion data pairs for deep earth studies. OpenSWI-real, compiled from open-source projects, contains two sets of observed dispersion curves and their corresponding 1-D reference models, serving as a benchmark for evaluating the generalization of deep learning models. To demonstrate the utility of OpenSWI, we trained deep learning models on OpenSWI-shallow and OpenSWI-deep, and evaluated them on OpenSWI-real. The results show strong agreement between the predicted and reference velocity models, confirming the diversity and representativeness of the OpenSWI dataset. To facilitate the advancement of intelligent surface wave dispersion curve inversion techniques, we release the OpenSWI dataset (<ext-link xlink:href="https://doi.org/10.5281/zenodo.16874111" ext-link-type="DOI">10.5281/zenodo.16874111</ext-link>, <xref ref-type="bibr" rid="bib1.bibx34" id="altparen.1"/>) and the SWIDP toolbox along with associated resources (<ext-link xlink:href="https://doi.org/10.5281/zenodo.16884901" ext-link-type="DOI">10.5281/zenodo.16884901</ext-link>, <xref ref-type="bibr" rid="bib1.bibx35" id="altparen.2"/>), providing open resources to support the research community.</p>
  </abstract>
    
<funding-group>
<award-group id="gs1">
<funding-source>National Natural Science Foundation of China</funding-source>
<award-id>42504129</award-id>
</award-group>
</funding-group>
</article-meta>
  </front>
<body>
      

<sec id="Ch1.S1" sec-type="intro">
  <label>1</label><title>Introduction</title>
      <p id="d2e229">Surface wave dispersion curve inversion is a fundamental geophysical technique for reconstructing subsurface shear wave velocity profiles by fitting theoretical dispersion curves to measured data <xref ref-type="bibr" rid="bib1.bibx61 bib1.bibx47 bib1.bibx59" id="paren.3"/>. It is widely applied in shallow engineering surveys, including site response and microzonation studies <xref ref-type="bibr" rid="bib1.bibx43 bib1.bibx52 bib1.bibx21" id="paren.4"/>, as well as in studies of lithospheric structure and evolution at greater depths <xref ref-type="bibr" rid="bib1.bibx48 bib1.bibx47 bib1.bibx67" id="paren.5"/>. In shallow subsurface investigations, this technique is valuable for identifying complex geological features such as weathering layers and overburden, while at greater depths, it provides critical insights into tectonic evolution <xref ref-type="bibr" rid="bib1.bibx45" id="paren.6"/>. Despite its widespread applicability, traditional inversion methods are heavily dependent on initial models and nonlinear optimization, leading to high computational costs and susceptibility to getting trapped in local minima <xref ref-type="bibr" rid="bib1.bibx48 bib1.bibx59 bib1.bibx9" id="paren.7"/>. These limitations hinder their applicability to large-scale, high-resolution imaging tasks.</p>
      <p id="d2e247">In recent years, rapidly developing deep learning methods have revolutionized the process of surface wave dispersion curve inversion. These data-driven approaches leverage deep neural networks, such as fully connected networks (FNNs), convolutional neural networks (CNNs), and Transformer networks, to learn the mapping between dispersion curves and subsurface shear wave velocity profiles <xref ref-type="bibr" rid="bib1.bibx28 bib1.bibx66 bib1.bibx55 bib1.bibx6 bib1.bibx29 bib1.bibx37 bib1.bibx30" id="paren.8"/>. By effectively eliminating reliance on initial models and iterative optimization, these methods significantly improve inversion efficiency and performance <xref ref-type="bibr" rid="bib1.bibx9" id="paren.9"/>. Once trained, the models can rapidly invert large-scale datasets in seconds, making them well-suited for real-time applications, such as field deployment and imaging. However, their performance and generalization ability are strongly influenced by both the quality and diversity of the training data <xref ref-type="bibr" rid="bib1.bibx39" id="paren.10"/>. Previous research has demonstrated that large-scale, diverse datasets substantially enhance deep model performance, particularly in scenarios with no labeled data (zero-shot learning) or limited labeled data that requiring fine-tuning (few-shot learning) <xref ref-type="bibr" rid="bib1.bibx39 bib1.bibx37" id="paren.11"/>. Therefore, the development of dispersion curve datasets that encompass representative geological features, multi-scale structures, and sufficient sample sizes is crucial for advancing intelligent inversion methods.</p>
      <p id="d2e262">Despite the importance of diverse datasets for deep learning methods, the construction of benchmark datasets specifically for surface wave dispersion curve inversion remains limited. In contrast, other areas of seismic research have seen the successful creation of large-scale datasets. For instance, in seismic monitoring, datasets like STEAD <xref ref-type="bibr" rid="bib1.bibx42" id="paren.12"/> and INSTANCE <xref ref-type="bibr" rid="bib1.bibx41" id="paren.13"/> contain millions of waveform data traces. Similarly, full-waveform inversion efforts have led to the creation of model collections such as OpenFWI <xref ref-type="bibr" rid="bib1.bibx12" id="paren.14"/> and EFWI <xref ref-type="bibr" rid="bib1.bibx13" id="paren.15"/>, each comprising hundreds of thousands of geological velocity models. Seismic exploration and engineering have also benefited from the development of standardized workflows and open benchmark datasets, such as cigFacies <xref ref-type="bibr" rid="bib1.bibx24" id="paren.16"/>, cigChannels <xref ref-type="bibr" rid="bib1.bibx58" id="paren.17"/>, and the HEMEWS-3D database for large-scale ground motion simulations in heterogeneous geological environments <xref ref-type="bibr" rid="bib1.bibx32" id="paren.18"/>. However, in the specific domain of surface wave dispersion curve inversion, there is still a significant lack of representative, well-structured, and publicly accessible datasets. One of the main challenges lies in the necessity of paired dispersion curves and velocity profiles to generate high-quality training samples. Actual observational data are often proprietary and not available to most of the researchers <xref ref-type="bibr" rid="bib1.bibx40" id="paren.19"/>. Moreover, observed dispersion curves are often compromised by limitations in observation conditions and subjective picking, resulting in issues such as noise contamination and data gaps <xref ref-type="bibr" rid="bib1.bibx52 bib1.bibx2" id="paren.20"/>. Additionally, the non-uniqueness of the corresponding velocity profiles further complicates the development of supervised models <xref ref-type="bibr" rid="bib1.bibx20" id="paren.21"/>, making it more difficult to train deep learning algorithms effectively.</p>
      <p id="d2e296">To address these challenges, synthetic surface wave dispersion curve data have emerged as a feasible alternative. Synthetic data, generated through a series of forward modeling processes, can effectively simulate field-observed dispersion curves. Since the corresponding velocity profiles are known in the simulation, this method naturally avoids pairing errors. Deep neural networks trained on synthetic data have demonstrated good applicability and inversion performance in shallow subsurface geological exploration <xref ref-type="bibr" rid="bib1.bibx7 bib1.bibx1 bib1.bibx66 bib1.bibx65 bib1.bibx23" id="paren.22"/> and deep structural imaging <xref ref-type="bibr" rid="bib1.bibx28 bib1.bibx55 bib1.bibx29 bib1.bibx30 bib1.bibx37" id="paren.23"/>. However, existing publicly available datasets are still largely limited to specific geological features or particular regions, lacking sufficient geological diversity and regional coverage. Given the complexity of shallow geology and the regional variability of deep structures, constructing a synthetic dataset with greater geological complexity, broader coverage, and larger sample sizes is essential for improving the generalization ability and practical applicability of models.</p>
      <p id="d2e306">In this paper, we introduce OpenSWI, a comprehensive benchmark dataset designed for surface wave dispersion curve inversion, developed through the dataset construction workflow SWIDP (Fig. <xref ref-type="fig" rid="F1"/>). OpenSWI includes two synthetic benchmark datasets, OpenSWI-shallow and OpenSWI-deep, each tailored to different research scales and application scenarios, as well as an AI-ready real-world dataset, OpenSWI-real, specifically for evaluating model generalization. The OpenSWI-shallow dataset, built upon the publicly available 2-D geological model dataset OpenFWI, incorporates a broad range of geological features, such as flat layers, faults, folds, and actual geological structures, containing approximately 22 million 1-D velocity profiles paired with their corresponding fundamental-mode surface wave dispersion curves. This makes it the largest and most geologically diverse dataset available for shallow subsurface studies. To further enhance structural diversity and sample variability, SWIDP integrates a Diffusion Probabilistic Model (DDPM), which learns the distribution of 2-D geological models and allows the continuous generation of more varied shallow subsurface data. The OpenSWI-deep dataset, generated by collecting, curating, and integrating 14 global and regional 3-D geological models, consists of approximately 1.26 million high-fidelity 1-D dispersion data samples, providing a large-scale benchmark for deep subsurface imaging tasks. OpenSWI-real, derived from two publicly available observational datasets and their reference velocity models, is directly applicable for performance testing and generalization validation of deep learning models in real-world applications. To evaluate the practical utility of these datasets, we trained two Transformer-based models using OpenSWI-shallow and OpenSWI-deep, then validated them on OpenSWI-real. Experimental results show that the inversion results of the trained models on real-world data are highly consistent with reference models, confirming the effectiveness and representativeness of the OpenSWI datasets for real-world applications. All datasets, along with the associated toolchain (including profile extraction, forward modeling and training examples), have been fully open-sourced, offering a reusable, high-quality benchmark platform for advancing future research in intelligent surface wave dispersion curve inversion.</p>

      <fig id="F1" specific-use="star"><label>Figure 1</label><caption><p id="d2e313">Overview of the workflow for constructing the OpenSWI benchmark datasets and their application in deep learning-based surface wave dispersion curve inversion. The workflow starts with the collection and quality control of raw data, followed by the extraction and augmentation of 1-D velocity profiles, and the simulation of dispersion curves to generate two synthetic datasets, OpenSWI-shallow and OpenSWI-deep, tailored for different research scales and application scenarios (blue box). To evaluate the generalization capability of deep learning models, a real-world dataset, OpenSWI-real, is also curated (purple box). Finally, a simple deep learning model, trained on the benchmark datasets, is applied to real observational data, as depicted in the gray box on the right.</p></caption>
        <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f01.png"/>

      </fig>

</sec>
<sec id="Ch1.S2">
  <label>2</label><title>Construction of the Large-scale OpenSWI Benchmark Datasets</title>
<sec id="Ch1.S2.SS1">
  <label>2.1</label><title>Integrated Workflow for Dataset Construction</title>
      <p id="d2e337">We present an integrated workflow for constructing large-scale benchmark datasets for surface wave dispersion curve inversion. The workflow is designed to ensure geological diversity and realism of the data sources, employ modular and fully automated processing, and ensure high accuracy and computational efficiency in forward modeling. It encompasses all major stages, from the collection and standardization of raw geological models, through quality control and parameterization, to the simulation of fundamental-mode dispersion curves, providing a reproducible pipeline for large-scale dataset generation.</p>
<sec id="Ch1.S2.SS1.SSS1">
  <label>2.1.1</label><title>Collection and Quality Control of Geological Models</title>
      <p id="d2e348">The first step in constructing a high-quality dataset for dispersion curve inversion is the collection of representative velocity models from diverse geological settings. These velocity models were primarily obtained from open-access geological databases and previously published studies, such as OpenFWI datasets <xref ref-type="bibr" rid="bib1.bibx12" id="paren.24"/> – which contain 2-D geological models covering various sedimentary and tectonic settings – and LITHO1.0 geological models <xref ref-type="bibr" rid="bib1.bibx44" id="paren.25"/>, providing lithospheric-scale structural information. Table <xref ref-type="table" rid="T1"/> summarizes the original data sources employed in this study. These rigorously curated and geologically validated models form a reliable foundation for constructing the OpenSWI datasets.</p>

<table-wrap id="T1" specific-use="star"><label>Table 1</label><caption><p id="d2e362">Original data sources used in constructing the OpenSWI datasets, summarizing dataset categories (e.g., OpenSWI-shallow, OpenSWI-deep, and OpenSWI-real), references, primary geological settings (e.g., Flat, Flat-Fault, Fold, Fold-Fault, and Field) or geographic coverage (e.g., global, China, Europe, the United States), recorded velocity parameters (e.g., P-wave velocity <inline-formula><mml:math id="M1" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, S-wave velocity <inline-formula><mml:math id="M2" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, combined shear-horizontal velocity <inline-formula><mml:math id="M3" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and shear-vertical velocity <inline-formula><mml:math id="M4" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), as well as the size of the raw data, expressed as <inline-formula><mml:math id="M5" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula> velocity profiles <inline-formula><mml:math id="M6" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> <inline-formula><mml:math id="M7" display="inline"><mml:mi>M</mml:mi></mml:math></inline-formula> model variables <inline-formula><mml:math id="M8" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2-D velocity model shape (for OpenSWI-shallow) or <inline-formula><mml:math id="M9" display="inline"><mml:mi>L</mml:mi></mml:math></inline-formula> layers (for OpenSWI-deep and OpenSWI-real).</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="6">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="justify" colwidth="4cm"/>
     <oasis:colspec colnum="3" colname="col3" align="left"/>
     <oasis:colspec colnum="4" colname="col4" align="justify" colwidth="2.8cm"/>
     <oasis:colspec colnum="5" colname="col5" align="left"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:thead>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1">Group</oasis:entry>
         <oasis:entry colname="col2" align="left">Reference</oasis:entry>
         <oasis:entry colname="col3">Datasets</oasis:entry>
         <oasis:entry colname="col4" align="left">Geological Feature/Cover Region</oasis:entry>
         <oasis:entry colname="col5">Model Variable</oasis:entry>
         <oasis:entry colname="col6">Model Size</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI shallow</oasis:entry>
         <oasis:entry colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx12" id="text.26"/>
                        </oasis:entry>
         <oasis:entry colname="col3">OpenFWI-FlatVelA</oasis:entry>
         <oasis:entry colname="col4" align="left">Flat</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M10" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">30 000 <inline-formula><mml:math id="M11" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 1 <inline-formula><mml:math id="M12" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70 <inline-formula><mml:math id="M13" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2" align="left"/>
         <oasis:entry colname="col3">OpenFWI-Flat-FaultA</oasis:entry>
         <oasis:entry colname="col4" align="left">Flat <inline-formula><mml:math id="M14" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> Fault</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M15" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">54 000 <inline-formula><mml:math id="M16" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 1 <inline-formula><mml:math id="M17" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70 <inline-formula><mml:math id="M18" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2" align="left"/>
         <oasis:entry colname="col3">OpenFWI-CurveVel</oasis:entry>
         <oasis:entry colname="col4" align="left">Fold</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M19" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">30 000 <inline-formula><mml:math id="M20" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 1 <inline-formula><mml:math id="M21" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70 <inline-formula><mml:math id="M22" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2" align="left"/>
         <oasis:entry colname="col3">OpenFWI-Fold-Fault</oasis:entry>
         <oasis:entry colname="col4" align="left">Fold <inline-formula><mml:math id="M23" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> Fault</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M24" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">54 000 <inline-formula><mml:math id="M25" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 1 <inline-formula><mml:math id="M26" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70 <inline-formula><mml:math id="M27" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2" align="left"/>
         <oasis:entry colname="col3">OpenFWI-StyleA</oasis:entry>
         <oasis:entry colname="col4" align="left">Field</oasis:entry>
         <oasis:entry colname="col5"><inline-formula><mml:math id="M28" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">67 000 <inline-formula><mml:math id="M29" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 1 <inline-formula><mml:math id="M30" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70 <inline-formula><mml:math id="M31" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI deep</oasis:entry>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx44" id="text.27"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">LITHO1.0</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">Global</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M32" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">40 962 <inline-formula><mml:math id="M33" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M34" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 96</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx64" id="text.28"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">USTClitho1.0</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">China</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M35" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">9125 <inline-formula><mml:math id="M36" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M37" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 12</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx49" id="text.29"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">Central-and-Western US</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">USA</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M38" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">6803 <inline-formula><mml:math id="M39" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M40" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 72</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx50" id="text.30"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">Continental China</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">China</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M41" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">4516 <inline-formula><mml:math id="M42" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M43" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 400</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx63" id="text.31"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">US Upper-Mantle</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">USA</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M44" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">3678 <inline-formula><mml:math id="M45" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M46" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 600</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx38" id="text.32"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">EUcrust</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">European</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M47" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">43 520 <inline-formula><mml:math id="M48" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M49" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 80</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx3" id="text.33"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">Alaska</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">Alaska</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M50" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">19 408 <inline-formula><mml:math id="M51" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M52" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 156</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left"><xref ref-type="bibr" rid="bib1.bibx11" id="text.34"/>,  <xref ref-type="bibr" rid="bib1.bibx4" id="text.35"/></oasis:entry>
         <oasis:entry rowsep="1" colname="col3">CSEM-Europe</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">European</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M53" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M54" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">21 931 <inline-formula><mml:math id="M55" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M56" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 61</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx4" id="text.36"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">CSEM-Eastmed</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">Eastern Mediterranean</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M57" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M58" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">12 782 <inline-formula><mml:math id="M59" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M60" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 81</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx14" id="text.37"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">CSEM-Iberian</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">Western Mediterranean</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M61" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M62" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">9102 <inline-formula><mml:math id="M63" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M64" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 81</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx10" id="text.38"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">CSEM-South Atlantic</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">South Atlantic</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M65" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M66" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">7371 <inline-formula><mml:math id="M67" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M68" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 51</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left"><xref ref-type="bibr" rid="bib1.bibx46" id="text.39"/>,  <xref ref-type="bibr" rid="bib1.bibx31" id="text.40"/></oasis:entry>
         <oasis:entry rowsep="1" colname="col3">CSEM-North Atlantic</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">North Atlantic</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M69" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M70" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">14 541 <inline-formula><mml:math id="M71" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M72" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 51</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry rowsep="1" colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx51" id="text.41"/>
                        </oasis:entry>
         <oasis:entry rowsep="1" colname="col3">CSEM-Japan</oasis:entry>
         <oasis:entry rowsep="1" colname="col4" align="left">Japanese Island</oasis:entry>
         <oasis:entry rowsep="1" colname="col5">depth, <inline-formula><mml:math id="M73" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M74" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry rowsep="1" colname="col6">14 641 <inline-formula><mml:math id="M75" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M76" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 61</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2" align="left"><xref ref-type="bibr" rid="bib1.bibx16" id="text.42"/>,  <xref ref-type="bibr" rid="bib1.bibx17" id="text.43"/></oasis:entry>
         <oasis:entry colname="col3">CSEM-Astralasia</oasis:entry>
         <oasis:entry colname="col4" align="left">Australasian</oasis:entry>
         <oasis:entry colname="col5">depth, <inline-formula><mml:math id="M77" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M78" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">4131 <inline-formula><mml:math id="M79" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 3 <inline-formula><mml:math id="M80" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 51</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI real</oasis:entry>
         <oasis:entry colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx22" id="text.44"/>
                        </oasis:entry>
         <oasis:entry colname="col3">LongBeach</oasis:entry>
         <oasis:entry colname="col4" align="left">USA</oasis:entry>
         <oasis:entry colname="col5">depth, <inline-formula><mml:math id="M81" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">5297 <inline-formula><mml:math id="M82" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M83" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 241</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2" align="left">
                          <xref ref-type="bibr" rid="bib1.bibx62" id="text.45"/>
                        </oasis:entry>
         <oasis:entry colname="col3">CSRM</oasis:entry>
         <oasis:entry colname="col4" align="left">Continental China</oasis:entry>
         <oasis:entry colname="col5">depth, <inline-formula><mml:math id="M84" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula></oasis:entry>
         <oasis:entry colname="col6">12 901 <inline-formula><mml:math id="M85" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 2 <inline-formula><mml:math id="M86" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 145</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

      <p id="d2e1619">However, because the raw velocity models originated from different research groups and projects, they exhibited considerable variability in several aspects, such as data characteristics (e.g., depth range and spatial resolution), parameter types (e.g., S-wave velocity (<inline-formula><mml:math id="M87" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), P-wave velocity (<inline-formula><mml:math id="M88" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), or combined shear-wave velocities in both horizontal and vertical directions (<inline-formula><mml:math id="M89" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M90" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>)), and storage formats (e.g., <monospace>.npz</monospace>, <monospace>.txt</monospace>, or <monospace>.nc</monospace>). To ensure consistency and physical plausibility, a unified quality control and standardization procedure was applied before incorporating the models into the dataset. The quality control procedures included the following steps: <list list-type="order"><list-item>
      <p id="d2e1678"><italic>Data correction and artifact removal.</italic> Isolated numerical artifacts occasionally appeared during model assembly or interpolation, such as single-cell zero values, <monospace>NaN</monospace> values, or anomalous velocity spikes inconsistent with the surrounding velocity field. These artifacts were corrected using local interpolation or single-point replacement to restore numerical consistency. Importantly, the correction was restricted to isolated grid anomalies and did not modify spatially coherent geological structures.</p></list-item><list-item>
      <p id="d2e1687"><italic>Parameter conversion.</italic> For models that provided only <inline-formula><mml:math id="M91" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, the corresponding <inline-formula><mml:math id="M92" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> were estimated using the empirical relationships proposed by <xref ref-type="bibr" rid="bib1.bibx5" id="text.46"/>. In cases where models included <inline-formula><mml:math id="M93" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sv</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M94" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">sh</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, an equivalent <inline-formula><mml:math id="M95" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> was derived using the geometric mean.</p></list-item><list-item>
      <p id="d2e1752"><italic>Plausibility verification.</italic> Geological structures within the models were systematically examined to remove anomalies inconsistent with geological principles or unsuitable for forward modeling.</p></list-item></list></p>
      <p id="d2e1758">These quality control measures substantially improved the accuracy and applicability of the geological models, thereby providing a robust and standardized data foundation for dispersion curve forward modeling and subsequent machine learning model training.</p>
</sec>
<sec id="Ch1.S2.SS1.SSS2">
  <label>2.1.2</label><title>Extraction and Parameterization of 1-D Velocity Profiles</title>
      <p id="d2e1770">After completing the quality control and standardization of the geological models, the next step was to construct 1-D velocity profiles suitable for forward modeling. As illustrated in Fig. <xref ref-type="fig" rid="F2"/>, this process involved multiple stages, including profile extraction from 2-D or 3-D geological models, removal of redundant samples, structural rationalization, and parameter completion.</p>

      <fig id="F2" specific-use="star"><label>Figure 2</label><caption><p id="d2e1777">Workflow for extracting and parameterizing 1-D velocity profiles. The upper row shows the process for OpenSWI-shallow, derived from multiple 2-D geological cross-sections, while the lower row illustrates the process for OpenSWI-deep, based on curated 3-D geological models. The workflow includes profile extraction, de-duplication, structure refinement, interpolation, standardization, and parameter conversion to generate depth, <inline-formula><mml:math id="M96" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> (blue), <inline-formula><mml:math id="M97" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> (red), and <inline-formula><mml:math id="M98" display="inline"><mml:mi mathvariant="italic">ρ</mml:mi></mml:math></inline-formula> (green) for forward modeling of surface wave dispersion curves.</p></caption>
            <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f02.jpg"/>

          </fig>

      <p id="d2e1815">Each 1-D profile contains key physical parameters extending from the surface to the target depth range, including depth, S-wave velocity (<inline-formula><mml:math id="M99" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), P-wave velocity (<inline-formula><mml:math id="M100" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), and density (<inline-formula><mml:math id="M101" display="inline"><mml:mi mathvariant="italic">ρ</mml:mi></mml:math></inline-formula>). The procedure is described as follows: <list list-type="order"><list-item>
      <p id="d2e1849"><italic>Extraction and de-duplication of 1-D profiles</italic>. Vertical 1-D <inline-formula><mml:math id="M102" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> profiles were extracted from 2-D geological cross-sections and 3-D geological models at surface grid points. In models with horizontally layered structures, adjacent grid points may yield identical vertical profiles. To reduce redundancy, the spatial sampling interval was controlled during extraction so that only representative profiles were retained. In addition, a similarity check was applied to the extracted profiles. Profile similarity was quantified using the structural similarity index (SSIM), and profiles exceeding a predefined similarity threshold were considered duplicates, with only one representative profile retained.</p></list-item><list-item>
      <p id="d2e1866"><italic>Structure refinement of 1-D profiles</italic>. To improve numerical stability during forward modeling, extremely thin layers and isolated velocity spikes that may arise during model extraction or interpolation were removed or merged with adjacent layers. Such layers are typically below the effective vertical resolution of surface-wave dispersion curves and may introduce unrealistic oscillations in the calculated dispersion relations. This refinement step therefore removes only sub-resolution numerical anomalies while preserving the overall stratigraphic structure of the velocity profiles.</p></list-item><list-item>
      <p id="d2e1872"><italic>Interpolation and standardization</italic>. Uniform layer-thickness interpolation was applied to ensure model consistency across different application scenarios. For shallow subsurface models, layers were resampled at 40 m intervals, whereas for deep-Earth models, a coarser 1 km interval was adopted. This standardization facilitated large-scale batch processing and streamlined integration with deep learning frameworks. We note, however, that some studies may prefer non-uniform layer-thickness schemes (e.g., finer resolution in the shallow part and coarser resolution at greater depths). To support such flexibility, users can easily regenerate alternative dataset versions using the original construction scripts we provide.</p></list-item><list-item>
      <p id="d2e1879"><italic>Completion of Other Physical Parameters</italic>. To construct complete elastic models, <inline-formula><mml:math id="M103" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M104" display="inline"><mml:mi mathvariant="italic">ρ</mml:mi></mml:math></inline-formula> were derived from the known <inline-formula><mml:math id="M105" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> profiles to ensure physical consistency. For depths shallower than 120 km, empirical relationships from <xref ref-type="bibr" rid="bib1.bibx5" id="text.47"/> were applied to compute <inline-formula><mml:math id="M106" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M107" display="inline"><mml:mi mathvariant="italic">ρ</mml:mi></mml:math></inline-formula>, which are well calibrated for crustal lithologies. For depths greater than or equal to 120 km, where these empirical formulas are less reliable, a representative upper-mantle <inline-formula><mml:math id="M108" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub><mml:mo>/</mml:mo><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> ratio of 1.79 was used to compute <inline-formula><mml:math id="M109" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> from <inline-formula><mml:math id="M110" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, and the density was subsequently estimated from <inline-formula><mml:math id="M111" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> using Brocher's empirical relationship to maintain a physically consistent density–velocity relationship. To avoid potential artificial discontinuities at the transition depth, a local smoothing procedure was applied to the derived parameters in the vicinity of 120 km, ensuring smooth and physically reasonable vertical variations.</p></list-item></list></p>
      <p id="d2e1987">Through these steps, we generated a comprehensive collection of 1-D velocity profiles characterized by geological diversity, physical consistency, and numerical stability.</p>
</sec>
<sec id="Ch1.S2.SS1.SSS3">
  <label>2.1.3</label><title>Augmentation of Velocity Models for Geological Diversity</title>
      <p id="d2e1998">Although the 1-D velocity profiles extracted and transformed from the aforementioned 2-D and 3-D geological models already surpass those used in previous studies in both quantity and diversity, they still cannot fully capture the complete range of geological types and their characteristic variations. To further broaden the dataset’s representativeness and establish a scalable data construction workflow, we designed and implemented multiple data augmentation strategies based on the original 2-D geological profiles and the processed 1-D velocity models, as outlined below:</p>
      <p id="d2e2002"><list list-type="order">
              <list-item>

      <p id="d2e2007"><italic>Perturbation-based augmentation of shallow 1-D velocity profiles.</italic> For near-surface geological models, controlled perturbations were applied to both velocities and layer thicknesses while preserving the overall layer structure, thereby enhancing variability across different geological scenarios. The procedure includes: (1) extracting the primary layers from the 1-D profiles; (2) applying constrained perturbations to the velocity and thickness of each layer within predefined ranges to generate structurally consistent variations <xref ref-type="bibr" rid="bib1.bibx39 bib1.bibx29 bib1.bibx37" id="paren.48"/>; and (3) performing structural plausibility checks on the perturbed profiles, followed by interpolation and parameter conversion as detailed in Sect. <xref ref-type="sec" rid="Ch1.S2.SS1.SSS2"/> to ensure physical and numerical validity. The top row of Fig. <xref ref-type="fig" rid="F3"/> illustrates the augmentation workflow and the resulting variations for a representative 1-D profile.</p>
              </list-item>
              <list-item>

      <p id="d2e2022"><italic>Feature-aware augmentation of deep 1-D velocity profiles.</italic> For deep geological structures characterized by distinct geophysical interfaces (e.g., the Moho discontinuity), we implemented a feature-aware perturbation strategy to improve model sensitivity to key geological boundaries. The procedure involves: (1) identifying the Moho interface in each 1-D profile; (2) fitting the crustal and mantle layers above and below the interface with cubic spline functions, where the number of spline nodes is randomly selected between 3–6 and 8–12, respectively; and (3) applying constrained perturbations to the velocity values at the spline nodes, followed by curve smoothing and re-interpolation to generate new deep velocity profiles. The bottom row of Fig. <xref ref-type="fig" rid="F3"/> illustrates the complete workflow and resulting variations for a representative 1-D profile.</p>
              </list-item>
              <list-item>

      <p id="d2e2032"><italic>Generative-model-based augmentation of 2-D geological models.</italic> To further enrich geological feature diversity and enable scalable dataset expansion tailored to user needs, we employed deep generative techniques, such as diffusion probabilistic models (DDPMs, <xref ref-type="bibr" rid="bib1.bibx27" id="altparen.49"/>), using the 2-D geological cross-section data collected in Sect. <xref ref-type="sec" rid="Ch1.S2.SS1.SSS1"/>. These models learn spatial feature distributions and synthesize additional 2-D geological models with improved geological consistency and structural diversity. This component of the workflow is described in greater detail in the subsequent section on shallow-subsurface dataset construction.</p>
              </list-item>
            </list></p>
      <p id="d2e2044">These augmentation strategies substantially enriched the dataset in terms of geological types, structural complexity, and the representation of key features. As a result, they provide more diverse and comprehensive training samples for deep learning models, thereby improving generalization and robustness when applied to complex geological settings.</p>

      <fig id="F3" specific-use="star"><label>Figure 3</label><caption><p id="d2e2050">Illustration of data augmentation and forward simulation examples. The top row shows perturbation-based augmentation applied to OpenSWI-shallow data, which increases variability in shallow 1-D velocity profiles. The bottom row shows feature-aware perturbation applied to OpenSWI-deep data, focusing on key structural features such as the Moho discontinuity. Thick lines represent the original 1-D profiles and their corresponding dispersion curves, while thin lines represent the augmented profiles and dispersion curves.</p></caption>
            <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f03.jpg"/>

          </fig>


</sec>
<sec id="Ch1.S2.SS1.SSS4">
  <label>2.1.4</label><title>Forward Modeling of Surface-wave Dispersion Curves</title>
      <p id="d2e2070">Based on the constructed 1-D velocity profiles, we employed efficient geophysical forward modeling tools to generate the corresponding surface-wave dispersion curves. Forward modeling is a critical step in dataset construction, ensuring that the simulated dispersion curves faithfully capture the propagation characteristics of surface waves in different subsurface media. The workflow comprises three main components:</p>
      <p id="d2e2073"><list list-type="bullet">
              <list-item>

      <p id="d2e2078"><italic>Defining the period range of dispersion curves.</italic> In practice, the period range and sampling points of observed dispersion curves vary considerably. To enhance the diversity and applicability of the dataset, we designed a hybrid sampling strategy for constructing the period axis. This strategy integrates uniform, random, and logarithmic sampling, with increased sampling density in the high-frequency range <xref ref-type="bibr" rid="bib1.bibx57 bib1.bibx37" id="paren.50"/>. Such design ensures broad coverage of surface-wave responses across different period bands, improving both the representativeness and utility of the simulated data.</p>
              </list-item>
              <list-item>

      <p id="d2e2089"><italic>Forward computation of dispersion curves.</italic> The forward modeling of surface wave dispersion curves fundamentally involves numerically solving the dispersion equation across a range of frequencies (<inline-formula><mml:math id="M112" display="inline"><mml:mi>f</mml:mi></mml:math></inline-formula>), where frequency is defined as the reciprocal of the period <inline-formula><mml:math id="M113" display="inline"><mml:mi>T</mml:mi></mml:math></inline-formula> (i.e., <inline-formula><mml:math id="M114" display="inline"><mml:mrow><mml:mi>f</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn><mml:mo>/</mml:mo><mml:mi>T</mml:mi></mml:mrow></mml:math></inline-formula>), to determine the corresponding phase velocity <inline-formula><mml:math id="M115" display="inline"><mml:mi>c</mml:mi></mml:math></inline-formula> for each mode <xref ref-type="bibr" rid="bib1.bibx54 bib1.bibx25 bib1.bibx36" id="paren.51"/>. This process can be formulated as a root-finding problem for the dispersion function <inline-formula><mml:math id="M116" display="inline"><mml:mi>D</mml:mi></mml:math></inline-formula>:

                    <disp-formula id="Ch1.E1" content-type="numbered"><label>1</label><mml:math id="M117" display="block"><mml:mrow><mml:mi>D</mml:mi><mml:mo>(</mml:mo><mml:mi>c</mml:mi><mml:mo>,</mml:mo><mml:mi>f</mml:mi><mml:mo>,</mml:mo><mml:mi mathvariant="bold">m</mml:mi><mml:mo>)</mml:mo><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0</mml:mn><mml:mo>,</mml:mo></mml:mrow></mml:math></disp-formula>

                  where <inline-formula><mml:math id="M118" display="inline"><mml:mi mathvariant="bold">m</mml:mi></mml:math></inline-formula> denotes the elastic parameters of the layered medium, and <inline-formula><mml:math id="M119" display="inline"><mml:mi>D</mml:mi></mml:math></inline-formula> encapsulates the frequency-dependent behavior of wave propagation in this structure. Solving Eq. (1) for each frequency yields the phase velocity dispersion curve <inline-formula><mml:math id="M120" display="inline"><mml:mrow><mml:mi>c</mml:mi><mml:mo>(</mml:mo><mml:mi>f</mml:mi><mml:mo>)</mml:mo></mml:mrow></mml:math></inline-formula> (also denoted as <inline-formula><mml:math id="M121" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">phase</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>), which characterizes the propagation speed of each harmonic component of the wavefield.</p>

      <p id="d2e2209">In addition to phase velocity, the group velocity <inline-formula><mml:math id="M122" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">group</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, which describes the propagation speed of wave packets, is a critical quantity for surface wave analysis. It is obtained as the derivative of angular frequency <inline-formula><mml:math id="M123" display="inline"><mml:mrow><mml:mi mathvariant="italic">ω</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">2</mml:mn><mml:mi mathvariant="italic">π</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:math></inline-formula> with respect to wavenumber <inline-formula><mml:math id="M124" display="inline"><mml:mi>k</mml:mi></mml:math></inline-formula>, and can be expressed in terms of the phase velocity as:

                    <disp-formula id="Ch1.E2" content-type="numbered"><label>2</label><mml:math id="M125" display="block"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">group</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi mathvariant="italic">ω</mml:mi></mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi>k</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>=</mml:mo><mml:mi>c</mml:mi><mml:mo>(</mml:mo><mml:mi>f</mml:mi><mml:mo>)</mml:mo><mml:mo>-</mml:mo><mml:mi>f</mml:mi><mml:mstyle displaystyle="true"><mml:mfrac style="display"><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi>c</mml:mi><mml:mo>(</mml:mo><mml:mi>f</mml:mi><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mi mathvariant="normal">d</mml:mi><mml:mi>f</mml:mi></mml:mrow></mml:mfrac></mml:mstyle><mml:mo>.</mml:mo></mml:mrow></mml:math></disp-formula>

                  The group velocity curve complements the phase velocity curve by offering additional sensitivity to subsurface structure and is especially useful in tomographic and inversion applications where energy transport characteristics are of interest.</p>

      <p id="d2e2306">For each 1-D velocity model, we used the Python library <monospace>Disba</monospace> (<uri>https://keurfonluu.github.io/disba</uri>, last access: 18 April 2026), adapted from the classical seismological software package <monospace>Computer Programs in Seismology (CPS)</monospace> <xref ref-type="bibr" rid="bib1.bibx26" id="paren.52"/>, to compute the dispersion curves. This tool efficiently calculates the fundamental-mode phase-velocity and group-velocity characteristics of Rayleigh waves and outputs complete period–velocity pairs (period, phase velocity, and group velocity) for each velocity model, ensuring comprehensive information for inversion tasks.</p>
              </list-item>
              <list-item>

      <p id="d2e2324"><italic>Parallelization and computational acceleration.</italic> Given the large scale of the dataset, we implemented multi-process parallelization and matrix-based batch processing to significantly improve computational efficiency. These optimizations enabled the simulation of hundreds of thousands to millions of dispersion curves within a practical timeframe, meeting the data requirements of deep learning applications.</p>
              </list-item>
            </list></p>
      <p id="d2e2331">This workflow produced a large-scale, quality-controlled dataset of surface-wave dispersion curves. Figure <xref ref-type="fig" rid="F3"/> showcases examples of dispersion curves from the OpenSWI-shallow and OpenSWI-deep datasets. These simulated data provide a solid foundation for training deep learning–based inversion models, facilitating applications in resource exploration and imaging of Earth’s internal structure.</p>

<table-wrap id="T2" specific-use="star"><label>Table 2</label><caption><p id="d2e2340">Comprehensive summary of the OpenSWI dataset, describing its categories (OpenSWI-shallow, OpenSWI-deep, and OpenSWI-real), associated period ranges (seconds, s), depth ranges (kilometers, km), and sampling intervals (kilometers, km), as well as the extracted and augmented 1-D velocity profiles (depth, <inline-formula><mml:math id="M126" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M127" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M128" display="inline"><mml:mi mathvariant="italic">ρ</mml:mi></mml:math></inline-formula>), expressed as <inline-formula><mml:math id="M129" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula> profiles <inline-formula><mml:math id="M130" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> <inline-formula><mml:math id="M131" display="inline"><mml:mi>M</mml:mi></mml:math></inline-formula> model variables <inline-formula><mml:math id="M132" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> <inline-formula><mml:math id="M133" display="inline"><mml:mi>L</mml:mi></mml:math></inline-formula> layers.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="6">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="left"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1">Group</oasis:entry>
         <oasis:entry colname="col2">Datasets</oasis:entry>
         <oasis:entry colname="col3">Period</oasis:entry>
         <oasis:entry colname="col4">Depth  Range (km)/</oasis:entry>
         <oasis:entry colname="col5">Extracted 1-D</oasis:entry>
         <oasis:entry colname="col6">Augmented 1-D</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2"/>
         <oasis:entry colname="col3">Range (s)</oasis:entry>
         <oasis:entry colname="col4">Depth Interval (km)</oasis:entry>
         <oasis:entry colname="col5">Velocity Profiles</oasis:entry>
         <oasis:entry colname="col6">Velocity Profiles</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI shallow</oasis:entry>
         <oasis:entry colname="col2">Flat</oasis:entry>
         <oasis:entry colname="col3">0.2–10</oasis:entry>
         <oasis:entry colname="col4">0–2.8/0.04</oasis:entry>
         <oasis:entry colname="col5">29 379 <inline-formula><mml:math id="M134" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M135" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
         <oasis:entry colname="col6">1 490 415 <inline-formula><mml:math id="M136" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M137" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Flat <inline-formula><mml:math id="M138" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> Fault</oasis:entry>
         <oasis:entry colname="col3">0.2–10</oasis:entry>
         <oasis:entry colname="col4">0–2.8/0.04</oasis:entry>
         <oasis:entry colname="col5">292 933 <inline-formula><mml:math id="M139" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M140" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
         <oasis:entry colname="col6">2 925 151 <inline-formula><mml:math id="M141" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M142" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Fold</oasis:entry>
         <oasis:entry colname="col3">0.2–10</oasis:entry>
         <oasis:entry colname="col4">0–2.8/0.04</oasis:entry>
         <oasis:entry colname="col5">295 751 <inline-formula><mml:math id="M143" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M144" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
         <oasis:entry colname="col6">2 952 975 <inline-formula><mml:math id="M145" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M146" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Fold <inline-formula><mml:math id="M147" display="inline"><mml:mo>+</mml:mo></mml:math></inline-formula> Fault</oasis:entry>
         <oasis:entry colname="col3">0.2–10</oasis:entry>
         <oasis:entry colname="col4">0–2.8/0.04</oasis:entry>
         <oasis:entry colname="col5">537 751 <inline-formula><mml:math id="M148" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M149" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
         <oasis:entry colname="col6">5 369 692 <inline-formula><mml:math id="M150" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M151" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Field</oasis:entry>
         <oasis:entry colname="col3">0.2–10</oasis:entry>
         <oasis:entry colname="col4">0–2.8/0.04</oasis:entry>
         <oasis:entry colname="col5">2 338 248 <inline-formula><mml:math id="M152" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M153" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
         <oasis:entry colname="col6">9 345 103 <inline-formula><mml:math id="M154" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M155" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">All</oasis:entry>
         <oasis:entry colname="col3">0.2–10</oasis:entry>
         <oasis:entry colname="col4">0–2.8/0.04</oasis:entry>
         <oasis:entry colname="col5">3 494 062 <inline-formula><mml:math id="M156" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M157" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
         <oasis:entry colname="col6">22 083 336<inline-formula><mml:math id="M158" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M159" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI deep</oasis:entry>
         <oasis:entry colname="col2">LITHO1.0</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">40 959 <inline-formula><mml:math id="M160" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M161" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">24 5771 <inline-formula><mml:math id="M162" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M163" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">USTClitho1.0</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">9125 <inline-formula><mml:math id="M164" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M165" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">54 750 <inline-formula><mml:math id="M166" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M167" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Central-and-Western US</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">6803 <inline-formula><mml:math id="M168" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M169" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">40 818 <inline-formula><mml:math id="M170" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula>  4 <inline-formula><mml:math id="M171" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula>  70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Continental China</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">4516 <inline-formula><mml:math id="M172" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M173" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">27 096 <inline-formula><mml:math id="M174" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M175" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">US Upper-Mantle</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">3678 <inline-formula><mml:math id="M176" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M177" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">22 061 <inline-formula><mml:math id="M178" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M179" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">EUcrust</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">43 520 <inline-formula><mml:math id="M180" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M181" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">261 155 <inline-formula><mml:math id="M182" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M183" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Alaska</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">19 408 <inline-formula><mml:math id="M184" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M185" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">116 448 <inline-formula><mml:math id="M186" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M187" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-Europe</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">21 931 <inline-formula><mml:math id="M188" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M189" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">131 586 <inline-formula><mml:math id="M190" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M191" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-Eastmed</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">12 782 <inline-formula><mml:math id="M192" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M193" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">76 692 <inline-formula><mml:math id="M194" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M195" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-Iberian</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">9102 <inline-formula><mml:math id="M196" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M197" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">54 612 <inline-formula><mml:math id="M198" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M199" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-South Atlantic</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">7371 <inline-formula><mml:math id="M200" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M201" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">44 226 <inline-formula><mml:math id="M202" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M203" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-North Atlantic</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">14 541 <inline-formula><mml:math id="M204" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M205" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">87 246 <inline-formula><mml:math id="M206" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M207" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-Japan</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">14 641 <inline-formula><mml:math id="M208" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M209" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">87 846 <inline-formula><mml:math id="M210" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M211" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSEM-Astralasia</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">4131 <inline-formula><mml:math id="M212" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M213" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">24 786 <inline-formula><mml:math id="M214" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M215" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">All</oasis:entry>
         <oasis:entry colname="col3">1–100</oasis:entry>
         <oasis:entry colname="col4">0–300/1.0</oasis:entry>
         <oasis:entry colname="col5">212 508 <inline-formula><mml:math id="M216" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M217" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col6">1 275 093 <inline-formula><mml:math id="M218" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M219" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI real</oasis:entry>
         <oasis:entry colname="col2">LongBeach</oasis:entry>
         <oasis:entry colname="col3">0.263–1.666</oasis:entry>
         <oasis:entry colname="col4">0–1.4/0.04</oasis:entry>
         <oasis:entry colname="col5">5297 <inline-formula><mml:math id="M220" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M221" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 35</oasis:entry>
         <oasis:entry colname="col6">–</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">CSRM</oasis:entry>
         <oasis:entry colname="col3">8–70</oasis:entry>
         <oasis:entry colname="col4">0–120/1.0</oasis:entry>
         <oasis:entry colname="col5">12 901 <inline-formula><mml:math id="M222" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 4 <inline-formula><mml:math id="M223" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 120</oasis:entry>
         <oasis:entry colname="col6">–</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
<sec id="Ch1.S2.SS1.SSS5">
  <label>2.1.5</label><title>Open-source Implementation</title>
      <p id="d2e3601">To promote reproducibility, scalability, and community engagement, we developed a standardized Python toolkit named <monospace>SWIDP</monospace> (Surface Wave Inversion Dataset Preparation pipeline). Built upon the key procedures described in Sect. <xref ref-type="sec" rid="Ch1.S2.SS1.SSS1"/>–<xref ref-type="sec" rid="Ch1.S2.SS1.SSS4"/>, <monospace>SWIDP</monospace> encapsulates core functionalities such as the extraction and parameterization of 1-D velocity profiles, data augmentation, and large-scale dispersion curve simulation.</p>
      <p id="d2e3614">By automating these processes, it enhances the efficiency, transparency, and consistency of dataset preparation. Designed with a modular architecture, <monospace>SWIDP</monospace> allows users to flexibly reuse or extend specific components, facilitating seamless adaptation to diverse research needs. Example codes are provided in Appendix <xref ref-type="sec" rid="App1.Ch1.S1"/> and <xref ref-type="sec" rid="App1.Ch1.S2"/>. The full source code is openly available at <ext-link xlink:href="https://doi.org/10.5281/zenodo.16884901" ext-link-type="DOI">10.5281/zenodo.16884901</ext-link> <xref ref-type="bibr" rid="bib1.bibx35" id="paren.53"/> and <uri>https://github.com/liufeng2317/OpenSWI</uri> (last access: 18 April 2026), enabling both academic and industrial users to adopt and further develop the toolkit.</p>
</sec>
</sec>
<sec id="Ch1.S2.SS2">
  <label>2.2</label><title>OpenSWI-shallow: Large-scale Benchmark for Complex Shallow Geology</title>
<sec id="Ch1.S2.SS2.SSS1">
  <label>2.2.1</label><title>Building Geological Model Foundations from OpenFWI</title>
      <p id="d2e3650">To establish a representative benchmark dataset for shallow-subsurface surface-wave dispersion curve inversion, we constructed a comprehensive collection of 2-D velocity models with diverse geological structures derived from the OpenFWI dataset <xref ref-type="bibr" rid="bib1.bibx12" id="paren.54"/>. These models encompass five primary geological categories: flat layers (Flat), flat layers with faults (Flat–Fault), folded layers (Fold), folded layers with faults (Fold–Fault), and field-style models (Field) inspired by realistic observations. Each category contains approximately 30 000, 54 000, 30 000, 54 000, and 67 000 samples, respectively. All models share a grid resolution of <inline-formula><mml:math id="M224" display="inline"><mml:mrow><mml:mn mathvariant="normal">70</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">70</mml:mn></mml:mrow></mml:math></inline-formula> with a spatial sampling interval of 40 m, ensuring sufficient detail to capture the complexity and variability of shallow-subsurface geological features.</p>
      <p id="d2e3668">Based on these 2-D models, we systematically extracted a large number of 1-D velocity profiles according to the geological characteristics of each geological categories. To enhance the dataset’s diversity and coverage, each original 1-D profile was augmented 4 to 10 times by independently applying perturbations of up to 10 % in layer thickness and 5 % in velocity. Following these perturbations, plausibility checks and interpolation adjustments were performed to ensure physical consistency and numerical stability. The final dataset comprises over 22 million 1-D velocity models spanning all geological categories. Detailed statistics of both the extracted and augmented profile counts for each category are summarized in Table <xref ref-type="table" rid="T2"/>.</p>
      <p id="d2e3673">Forward modeling of fundamental-mode Rayleigh-wave dispersion curves was then conducted for all 1-D models. Given that the maximum depth of these profiles is approximately 2.8 km, the simulated period range was defined from 0.2 to 10 s, with 100 period points sampled per curve. To improve period coverage and model generalization capability, the sampling points were selected using a hybrid strategy combining uniform, random, and logarithmic sampling, contributing 50, 30, and 20 points, respectively. Each dispersion curve includes period, phase-velocity, and group-velocity information, serving as training and validation data for subsequent deep learning applications.</p>
      <p id="d2e3676">Figures <xref ref-type="fig" rid="F4"/> and <xref ref-type="fig" rid="F5"/> showcase the representativeness and statistical properties of the OpenSWI-shallow dataset. Figure <xref ref-type="fig" rid="F4"/> illustrates the diverse geological scenarios covered by the dataset through representative 2-D velocity models, systematically extracted 1-D profiles, and their augmented variants, together with the corresponding phase and group velocity dispersion curves. Figure <xref ref-type="fig" rid="F5"/> further summarizes the large-scale statistical distributions of profiles and dispersion characteristics across all geological types, highlighting the dataset’s substantial improvements in structural diversity, distributional coverage, and suitability for data-driven surface wave inversion studies.</p>

      <fig id="F4" specific-use="star"><label>Figure 4</label><caption><p id="d2e3690">Representative samples from the OpenSWI-shallow dataset. The top two rows present original 2-D velocity models for five geological types: Flat, Flat–Fault, Fold, Fold–Fault, and Field. The middle two rows show the corresponding extracted 1-D velocity profiles (bold black lines) and their augmented variants (thin colored lines). The bottom two rows display the simulated Rayleigh-wave dispersion curves, with phase velocities shown in pink and group velocities in blue.</p></caption>
            <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f04.jpg"/>

          </fig>

      <fig id="F5" specific-use="star"><label>Figure 5</label><caption><p id="d2e3701">Statistical characteristics of the OpenSWI-shallow dataset. Distribution of 1-D velocity profiles and corresponding dispersion curves for each geological style: <bold>(a)</bold> Flat, <bold>(b)</bold> Flat-Fault, <bold>(c)</bold> Fold, <bold>(d)</bold> Fold-Fault, <bold>(e)</bold> Field. The black lines represent the mean, and the shaded regions indicate the <inline-formula><mml:math id="M225" display="inline"><mml:mo>±</mml:mo></mml:math></inline-formula>1 standard deviation range. Panel <bold>(f)</bold> summarizes the mean and variance across the five geological subsets.</p></caption>
            <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f05.jpg"/>

          </fig>

</sec>
<sec id="Ch1.S2.SS2.SSS2">
  <label>2.2.2</label><title>Optional Dataset Expansion with DDPM</title>
      <p id="d2e3744">Although the proposed OpenSWI-shallow dataset constructed from OpenFWI substantially improves geological structural diversity compared with existing dispersion curve datasets, it cannot fully cover the complete range of velocity structure observed in real subsurface settings. To provide a scalable pathway for further dataset expansion, we optionally incorporated a deep generative module based on  Diffusion Probabilistic Model (DDPM), specifically designed for the shallow subsurface within the 0–3 km depth range.</p>
      <p id="d2e3747">This module uses 2-D velocity models from OpenFWI as training data to develop multiple DDPMs, which learn the distributional characteristics of different geological structures. Starting from Gaussian noise, the DDPMs iteratively generate velocity models with realistic structural features, consistently reproducing faults, folds, and complex sedimentary units. Compared with traditional manual or perturbed augmentation, the DDPM-generated data provide clear advantages in structural continuity, geological realism, and controllable scalability, significantly expanding the foundational velocity model library <xref ref-type="bibr" rid="bib1.bibx27 bib1.bibx56 bib1.bibx53" id="paren.55"/>. Details of the DDPM design and training are provided in Appendix <xref ref-type="sec" rid="App1.Ch1.S3"/>, and the code has been publicly released with the SWIDP pipeline for reproducibility.</p>
      <p id="d2e3755">Figure <xref ref-type="fig" rid="F6"/> illustrates the continual expansion of the OpenSWI-shallow dataset using the DDPM module. The diffusion model progressively transforms Gaussian noise into geologically realistic 2-D velocity models through a 1000-step denoising process, from which representative 1-D profiles are extracted and used to simulate Rayleigh-wave dispersion curves. This diffusion-based augmentation strategy substantially enriches the structural diversity and spatial coverage of the dataset, thereby improving the generalization capability of deep learning models.</p>

      <fig id="F6" specific-use="star"><label>Figure 6</label><caption><p id="d2e3763">Continual expansion of the OpenSWI-shallow dataset using a diffusion-based generative module. The left panel illustrates a 1000-step denoising trajectory, where Gaussian noise is progressively transformed into 2-D velocity models with realistic geological structures. The right panel presents representative 1-D velocity profiles extracted from the generated models, along with their corresponding Rayleigh-wave dispersion curves simulated using the SWIDP pipeline.</p></caption>
            <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f06.jpg"/>

          </fig>

</sec>
</sec>
<sec id="Ch1.S2.SS3">
  <label>2.3</label><title>OpenSWI-deep: Global Coverage Benchmark for Deep Earth Imaging</title>
      <p id="d2e3781">Building upon the shallow-subsurface benchmark dataset introduced in Sect. <xref ref-type="sec" rid="Ch1.S2.SS2"/>, we further extended the OpenSWI framework to deeper Earth structures. However, for the deeper Earth structure, systematic datasets of regular velocity models remain largely unavailable. To address this gap, we compiled a collection of representative 3-D velocity models from published literature and geophysical studies. This collection includes one global-scale model and 13 high-resolution regional models, each constructed using different methodologies and data sources to maximize geological representativeness and geophysical applicability. Figure <xref ref-type="fig" rid="F7"/> shows the spatial distribution of these 14 models with horizontal slices at a depth of 60 km.</p>

      <fig id="F7" specific-use="star"><label>Figure 7</label><caption><p id="d2e3790">Spatial distribution of the 14 velocity models compiled for the OpenSWI-deep dataset. The collection includes one global-scale model and 13 high-resolution regional models obtained from published literature and geophysical studies. Horizontal slices at a depth of 60 km are shown to illustrate their geographic coverage and tectonic diversity. The gray dots in the central global map indicate the sampling locations of the LITHO1.0 dataset used to extract the structural parameters.</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f07.jpg"/>

        </fig>

      <p id="d2e3799">Among them, LITHO1.0 provides global information on the crust and upper mantle, encompassing sedimentary layers, crust, lithosphere, and asthenosphere, at a spatial resolution of 1° <xref ref-type="bibr" rid="bib1.bibx44" id="paren.56"/>. This model is widely used in seismic tomography and as a reference Earth model. USTClitho1.0, derived from double-difference tomography using seismic data from the Chinese National Seismic Network, resolves crustal and upper mantle structures down to 150 km depth at a horizontal resolution of 0.5°, supporting studies of regional deep structures <xref ref-type="bibr" rid="bib1.bibx64" id="paren.57"/>. The Central and Western US <xref ref-type="bibr" rid="bib1.bibx49" id="paren.58"/> and Continental China <xref ref-type="bibr" rid="bib1.bibx50" id="paren.59"/> models integrate ambient noise and teleseismic surface waves with receiver function data and apply a Bayesian Monte Carlo inversion to image crust and upper mantle structures to 150 km depth at 0.5° resolution. The US Upper Mantle model uses long-period Rayleigh wave ambient noise and Markov chain Monte Carlo inversion to map shear-wave velocities down to 300 km across the continental United States <xref ref-type="bibr" rid="bib1.bibx63" id="paren.60"/>. Similarly, the EUCrust model, based on four years of ambient noise data from 1293 broadband stations, resolves the European crust and uppermost mantle with high resolution using Bayesian nonlinear methods <xref ref-type="bibr" rid="bib1.bibx38" id="paren.61"/>. The Alaska model combines data from over 200 Transportable Array stations and integrates Rayleigh wave ellipticity, phase velocity, and receiver functions, using Markov chain inversion to image structures from the upper mantle to near-surface depths (140 km) <xref ref-type="bibr" rid="bib1.bibx3" id="paren.62"/>.</p>
      <p id="d2e3825">We also included several regional models from The Collaborative Seismic Earth Model Project (CSEM), constructed through full-waveform inversion <xref ref-type="bibr" rid="bib1.bibx15 bib1.bibx18 bib1.bibx19" id="paren.63"/>. These cover Europe <xref ref-type="bibr" rid="bib1.bibx11 bib1.bibx4" id="paren.64"/>, the Eastern and Western Mediterranean <xref ref-type="bibr" rid="bib1.bibx4 bib1.bibx14" id="paren.65"/>, the South and North Atlantic <xref ref-type="bibr" rid="bib1.bibx10 bib1.bibx46 bib1.bibx31" id="paren.66"/>, the Japanese Islands <xref ref-type="bibr" rid="bib1.bibx51" id="paren.67"/>, and Australasia <xref ref-type="bibr" rid="bib1.bibx16 bib1.bibx17" id="paren.68"/>. These models are characterized by high resolution and structural consistency and are widely used for deep Earth imaging and geodynamic research.</p>
      <p id="d2e3847">All collected 3-D velocity models underwent quality control, including duplicate removal, anomaly detection, gap interpolation, and format homogenization to ensure consistency for dataset construction. From the processed models, we extracted approximately 212 508 1-D velocity profiles, with detailed statistics for each data source provided in Table <xref ref-type="table" rid="T2"/>. A quantitative analysis of the diversity and similarity of the extracted 1-D velocity profiles is provided in Appendix <xref ref-type="sec" rid="App1.Ch1.S4"/>. To further increase diversity, each profile was augmented five times using a hierarchical strategy: the depth of the Moho discontinuity was first identified, and profiles were divided into crust and upper mantle sections. Each section was parameterized using cubic spline curves, with 3–6 control nodes for the crust and 6–12 nodes for the upper mantle, followed by random perturbations of the nodes to introduce structural variations. This augmentation preserved key geological features (e.g., the Moho interface) while significantly expanding the coverage and variability of the model library, ultimately yielding approximately 1.26 million augmented 1-D velocity models.</p>
      <p id="d2e3854">For each 1-D profile, we simulated fundamental-mode Rayleigh-wave dispersion curves over a 1–100 s period range, sampling 300 points using a combination of uniform, random, and logarithmic strategies (50, 30, and 20 points, respectively). Each dispersion curve, together with its associated velocity profile, constitutes a complete input–output pair for subsequent deep learning model training and validation. Figure <xref ref-type="fig" rid="F8"/> illustrates representative 1-D velocity profiles and their corresponding dispersion curves from the regional models, highlighting the relationships between velocity structures and surface-wave propagation under diverse geological conditions. These results provide high-quality initial data support for global geophysical imaging across different regions and scales.</p>

      <fig id="F8" specific-use="star"><label>Figure 8</label><caption><p id="d2e3861">Representative samples from the OpenSWI-deep dataset. The first and third rows show 1-D velocity profiles extracted from the 14 sub-datasets, where the mean velocity model is indicated by a solid black line. The second and fourth rows display the corresponding fundamental-mode Rayleigh-wave dispersion curves over a 1–100 s period range, with the mean phase and group velocities shown in pink and blue, respectively.</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f08.png"/>

        </fig>

</sec>
<sec id="Ch1.S2.SS4">
  <label>2.4</label><title>OpenSWI-real: AI-ready Real-world Dataset for Generalization Testing</title>
      <p id="d2e3878">In addition to the large-scale synthetic velocity profile–dispersion curve datasets designed for model training, we curated multiple AI-ready real-world dispersion curve datasets to assess the adaptability and generalization capability of deep learning methods under practical geophysical conditions. The first dataset is derived from the dispersion curve data processed by <xref ref-type="bibr" rid="bib1.bibx22" id="text.69"/> in the Long Beach region of the United States. As shown in Fig. <xref ref-type="fig" rid="F9"/>a, over 5200 short-period nodal stations were deployed between January and June 2011, primarily for oilfield surveys <xref ref-type="bibr" rid="bib1.bibx33" id="paren.70"/>, with an average station spacing of approximately 0.1 km. To achieve adequate spatial resolution, the dense array was divided into multiple subarrays, each with a 2 km radius. Dispersion curves were extracted automatically using a deep neural network after the frequency–Bessel (F–J) transform was applied to compute the frequency–phase velocity spectrum for each subarray. Figure <xref ref-type="fig" rid="F9"/>b shows representative observed dispersion curves from 9 stations (purple dashed lines), together with 1-D reference shear-wave velocity profiles (black solid lines) obtained via traditional inversion methods. This dataset contains only phase velocity data, without group velocity information. After standardized processing, it comprises observed dispersion data from 5297 stations (period range: 0.263–1.666 s) and corresponding reference velocity models (depth range: 0–1.4 km, interpolated at 40 m intervals).</p>

      <fig id="F9" specific-use="star"><label>Figure 9</label><caption><p id="d2e3893">Overview of the OpenSWI-real dataset. <bold>(a)</bold> Station deployment for the Long Beach dataset in Southern California. <bold>(b)</bold> Representative observed phase velocity dispersion curves (purple dashed lines) and reference velocity models (black lines) from traditional inversion. <bold>(c)</bold> Distribution of selected grid points in the CSRM dataset across continental China, with background color denoting velocity at 70 km depth. <bold>(d)</bold> Representative examples from the CSRM dataset showing observed group (blue) and phase (purple) velocity curves and corresponding reference 1-D velocity profiles (black).</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f09.jpg"/>

        </fig>

      <p id="d2e3914">The second dataset originates from the China Seismological Reference Model Project <xref ref-type="bibr" rid="bib1.bibx60 bib1.bibx62" id="paren.71"/>. <xref ref-type="bibr" rid="bib1.bibx62" id="text.72"/> collected continuous seismic records from multiple networks, including the China National Seismic Network (CNSN), the China Seismic Array (ChinArray), and the Public Data Management Center (PDMC), spanning 4196 seismic stations in total. Ambient noise cross-correlations between station pairs produced 639 171 empirical Green’s functions, from which dispersion curves were extracted using frequency–time analysis. Additionally, 54 792 event–station dispersion curves were retrieved from 226 regional seismic events recorded by 1463 stations. After gridding and quality control, the data were consolidated into 20 514 grid points and standardized to a period range of 8–70 s. To ensure reliability, we retained 12 901 grid points with at least 20 sampled period points. The resulting AI-ready dataset contains observed dispersion curves at these grid points (period range: 8–70 s) and their corresponding reference velocity models (depth range: 0–120 km, interpolated at 1 km intervals). Figure <xref ref-type="fig" rid="F9"/>c shows the spatial distribution of the selected grid points across continental China, with background colors indicating the reference model velocity at 70 km depth. Figure <xref ref-type="fig" rid="F9"/>d presents nine representative grid points, displaying observed dispersion curves (blue: group velocity; purple: phase velocity) and corresponding reference velocity profiles (black solid lines) derived from the traditional inversion results of <xref ref-type="bibr" rid="bib1.bibx62" id="text.73"/>.</p>
</sec>
</sec>
<sec id="Ch1.S3">
  <label>3</label><title>Deep-learning-based Framework for Surface-wave Inversion</title>
<sec id="Ch1.S3.SS1">
  <label>3.1</label><title>Transformer-based Architecture for Dispersion Curve Inversion</title>
      <p id="d2e3946">Deep learning-based surface-wave dispersion curve inversion seeks to learn a nonlinear mapping from input dispersion curves (including period, phase velocity, and group velocity) to corresponding 1-D subsurface shear-wave velocity profiles. In this study, we adopt a widely used Transformer-based architecture (Fig. <xref ref-type="fig" rid="F10"/>a) to enable end-to-end inversion <xref ref-type="bibr" rid="bib1.bibx37 bib1.bibx30" id="paren.74"/>. The input to the model is a <inline-formula><mml:math id="M226" display="inline"><mml:mrow><mml:mn mathvariant="normal">3</mml:mn><mml:mo>×</mml:mo><mml:mi>N</mml:mi></mml:mrow></mml:math></inline-formula> dispersion curve matrix, where the 3 rows represent period, phase velocity, and group velocity, and <inline-formula><mml:math id="M227" display="inline"><mml:mi>N</mml:mi></mml:math></inline-formula> denotes the number of sampled points. The model initially embeds the input via three separate <inline-formula><mml:math id="M228" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow></mml:math></inline-formula> convolutional neural network (CNN) layers, yielding a feature representation of size <inline-formula><mml:math id="M229" display="inline"><mml:mrow><mml:mn mathvariant="normal">3</mml:mn><mml:mo>×</mml:mo><mml:mi>N</mml:mi><mml:mo>×</mml:mo><mml:mi>E</mml:mi></mml:mrow></mml:math></inline-formula>, where <inline-formula><mml:math id="M230" display="inline"><mml:mi>E</mml:mi></mml:math></inline-formula> is the feature dimension. These embedded features are then processed through multiple Transformer blocks, which employ self-attention mechanisms to capture long-range dependencies across the dispersion curves. This global context modeling enhances the stability and accuracy of inversion results. Finally, a feature projection layer maps the global features extracted by the Transformer to a velocity profile of length <inline-formula><mml:math id="M231" display="inline"><mml:mi>M</mml:mi></mml:math></inline-formula>, where <inline-formula><mml:math id="M232" display="inline"><mml:mi>M</mml:mi></mml:math></inline-formula> corresponds to the number of target depth layers, producing the final inversion output.</p>

      <fig id="F10" specific-use="star"><label>Figure 10</label><caption><p id="d2e4025"><bold>(a)</bold> The architecture of the deep neural network (Transformer) used in this work for surface wave dispersion curve inversion. The Training (blue) and validation (red) loss curve on the <bold>(b)</bold> OpenSWI-shallow, and <bold>(c)</bold> OpenSWI-deep datasets. The learning rate curve are presents in the inner figure with purple line.</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f10.png"/>

        </fig>

      <p id="d2e4042">Given that the period range and target depth in real observational data vary, and that the maximum inversion depth strongly correlates with the observed period range, we incorporate the depth-aware strategy proposed by <xref ref-type="bibr" rid="bib1.bibx37" id="text.75"/> during training. This approach dynamically computes the maximum wavelength (period multiplied by velocity) for each input and adaptively determines the effective output depth range, thereby suppressing predictions at irrelevant depths and improving inversion accuracy. For the loss function, we adopt the Mean Squared Error (MSE), calculated exclusively over the effective depth range between predicted and ground-truth velocity profiles. To enhance robustness against noise and missing data commonly encountered in practice, we simulate these effects during data loading by adding 3 % random Gaussian noise and randomly masking 10 % of the dispersion data points.</p>
      <p id="d2e4049">Regarding training configuration, we use a larger batch size of 2048 and limit training to 100 epochs for the shallow dispersion dataset (OpenSWI-shallow) to optimize large-scale training efficiency. For the deeper dataset (OpenSWI-deep), a smaller batch size of 512 and up to 1000 epochs are employed. To avoid overfitting and reduce unnecessary computation, we adopt an early stopping strategy, terminating training when the validation loss does not improve for 30 consecutive epochs for OpenSWI-shallow and 50 epochs for OpenSWI-deep. Both datasets are trained using the Adam optimizer, combined with a learning rate scheduler that integrates warm-up and step decay strategies to enhance training stability. During warm-up, the learning rate increases linearly from <inline-formula><mml:math id="M233" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">9</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> to <inline-formula><mml:math id="M234" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula> over approximately 2 epochs for OpenSWI-shallow and 10 epochs for OpenSWI-deep. In the subsequent step decay phase, the learning rate is reduced to 75 % of its value every 40 epochs for OpenSWI-shallow and every 500 epochs for OpenSWI-deep. Figure <xref ref-type="fig" rid="F10"/>b and c present the training and validation error curves for both datasets alongside their corresponding learning rate schedules.</p>
      <p id="d2e4090">Model performance is first evaluated on the test sets by comparing predicted and ground-truth velocity profiles using Root Mean Squared Error (RMSE). Beyond this quantitative validation, the trained models are applied to real observational data to assess their generalization capabilities. Instead, we compare the synthetic and observed dispersion curves to compute the misfit errors, and assess the inversion quality by analyzing the distribution of these errors, including their mean and variance.</p>

      <fig id="F11" specific-use="star"><label>Figure 11</label><caption><p id="d2e4095">Representative inversion results on the test subsets of <bold>(a)</bold> OpenSWI-shallow and <bold>(b)</bold> OpenSWI-deep. The black lines represent the ground-truth velocity profiles, while the red lines denote the predicted results obtained by the trained neural network.</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f11.png"/>

        </fig>

</sec>
<sec id="Ch1.S3.SS2">
  <label>3.2</label><title>Large-scale Training with the OpenSWI-shallow and OpenSWI-deep Datasets</title>
      <p id="d2e4118">To comprehensively assess the effectiveness of the proposed deep neural network model for surface wave dispersion curve inversion, we conducted systematic training on both the OpenSWI-shallow and OpenSWI-deep datasets. Detailed architectural hyperparameters are provided in Appendix <xref ref-type="sec" rid="App1.Ch1.S5"/>. To ensure balanced representation across the training, validation, and test subsets, we employed stratified sampling strategies. Specifically, for the OpenSWI-shallow dataset, stratification was based on geological structure types (Flat, Flat-Fault, Fold, Fold-Fault, and Field), using a 90 %/5 %/5 % split. For the OpenSWI-deep dataset, stratification was performed by geographic regions of the source models, following the same partitioning ratio. Furthermore, to assess whether the proposed Transformer-based architecture provides advantages over more conventional neural network designs, we conducted additional benchmarking experiments using alternative deep learning models, including Unet- <xref ref-type="bibr" rid="bib1.bibx57" id="paren.76"/> and FCNN-based <xref ref-type="bibr" rid="bib1.bibx8" id="paren.77"/> architectures. These models were trained and evaluated under the same experimental settings on the OpenSWI-shallow and OpenSWI-deep datasets. The detailed network configurations and benchmarking results are provided in Appendix <xref ref-type="sec" rid="App1.Ch1.S6"/>.</p>
      <p id="d2e4131">During training, both training and validation errors were continuously monitored, as illustrated in Fig. <xref ref-type="fig" rid="F10"/>b and c. For both datasets, the error curves demonstrate stable convergence, suggesting that the model effectively captures the nonlinear relationship between surface wave dispersion curves and subsurface shear-wave velocity profiles. After training, evaluation on the held-out test sets yielded RMSE values of 0.1467 km s<sup>−1</sup>  for OpenSWI-shallow and 0.048 km s<sup>−1</sup> for OpenSWI-deep, indicating that the predicted velocity models closely match the ground-truth profiles and confirming the model’s high inversion accuracy under varying geological conditions.</p>
      <p id="d2e4160">Representative inversion results from both datasets are shown in Fig. <xref ref-type="fig" rid="F11"/>a and b, further demonstrating the model’s capability to reconstruct subsurface velocity structures with high fidelity, including at greater depths. These results validate the generalization ability and practical applicability of the proposed method across diverse geological settings. It is worth noting that OpenSWI-shallow includes significantly more samples than OpenSWI-deep and encompasses a wider variety of geologically diverse and structurally complex velocity models. Consequently, achieving optimal performance on this dataset demands more specialized architectural designs and training strategies. While the model maintains strong overall inversion quality, it tends to oversmooth regions characterized by strong heterogeneity or abrupt structural changes, resulting in slightly muted responses in complex geological zones. This smoothing effect highlights current limitations in resolving fine-scale structural features and underscores the need for future enhancements, such as structure-aware regularization or multi-scale modeling techniques, to improve the representation of intricate subsurface variations.</p>
</sec>
<sec id="Ch1.S3.SS3">
  <label>3.3</label><title>Generalization Testing on Real-world Observations Using OpenSWI-real</title>
      <p id="d2e4173">To evaluate the generalization capability of deep neural networks trained entirely on synthetic data, we directly applied the pretrained models to the OpenSWI-real dataset, which includes two representative real-world regions: Long Beach (shallow) and CSRM (deep). In the shallow case, we used phase velocity dispersion curves from 5297 stations in the Long Beach area as input to the shallow inversion network. The model generated a 1-D S-wave velocity profile for each station, which were then assembled into a 3-D velocity model of the region. Figure <xref ref-type="fig" rid="F12"/>a presents horizontal slices of the predicted model at depths of 100, 200, 400, and 600 m, alongside the corresponding reference model. Figure <xref ref-type="fig" rid="F12"/>b compares selected 1-D profiles from both models. Notably, despite the complete absence of Long Beach data during training, the model successfully reconstructs key subsurface velocity structures. In particular, the predicted profiles at 100 and 200 m show excellent agreement with the reference model. Figure <xref ref-type="fig" rid="F12"/>c shows the observed dispersion curves (black), as well as synthetic curves generated from the reference model (blue) and the neural network predictions (red). To quantitatively evaluate inversion performance, we computed the misfit between observed dispersion curves and those derived from the predicted velocity profiles. Figure <xref ref-type="fig" rid="F12"/>d summarizes the error distributions. For the reference model, the mean and variance of the misfit are <inline-formula><mml:math id="M237" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>33.9 m s<sup>−1</sup> and 14.7 m s <sup>−2</sup>, respectively, whereas the neural network predictions yield a mean misfit of 1.8 m s<sup>−1</sup> and a variance of 18.1 m s <sup>−2</sup>. These results demonstrate that the pretrained model generalizes effectively to real observational data and, in many cases, even outperforms the reference model, particularly in shallow geological settings.</p>

      <fig id="F12" specific-use="star"><label>Figure 12</label><caption><p id="d2e4242">Generalization performance on real-world Long Beach data from the OpenSWI-real dataset. <bold>(a)</bold> Reference <xref ref-type="bibr" rid="bib1.bibx22" id="paren.78"/> and predicted <inline-formula><mml:math id="M242" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> slices at depths of 100, 200, 300, and 600 m. <bold>(b)</bold> 1-D <inline-formula><mml:math id="M243" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> profiles at nine representative locations, with reference and predicted models shown in blue and red, respectively. <bold>(c)</bold> Comparison of phase velocity dispersion curves, including observed curves (black), synthetic curves from the reference model (blue) and the predicted model (red). <bold>(d)</bold> Error distributions of phase velocity with respect to observed curves, based on synthetic dispersion curves from the reference (blue) and predicted (purple) models.</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f12.jpg"/>

        </fig>

      <p id="d2e4289">For the deep case, we applied the pretrained deep inversion network to both phase and group velocity dispersion curves at 12 901 grid points provided by the CSRM project <xref ref-type="bibr" rid="bib1.bibx60 bib1.bibx62" id="paren.79"/>. Figure <xref ref-type="fig" rid="F13"/>a compares the predicted and reference velocity structures at depths of 20, 40, 60, and 80 km. Figure <xref ref-type="fig" rid="F13"/>b shows 1-D profile comparisons at nine representative grid points, where black lines denote the reference models and red lines indicate the neural network predictions. Figure <xref ref-type="fig" rid="F13"/>c presents the observed dispersion curves (black), along with synthetic curves generated from the reference model (blue) and the predicted models (red). Figure <xref ref-type="fig" rid="F13"/>d displays the distribution of misfits between synthetic and observed dispersion curves across all grid points. The reference model achieves a mean misfit of <inline-formula><mml:math id="M244" display="inline"><mml:mo>-</mml:mo></mml:math></inline-formula>72.9 m s<sup>−1</sup> with a variance of 65.5 m s <sup>−2</sup>, while the neural network results exhibit a mean misfit of 24.8 m s<sup>−1</sup> and a lower variance of 49.6 m s <sup>−2</sup>. These findings suggest that the trained network can recover deep crustal velocity structures with accuracy comparable to, or better than, that of the reference model – even without any fine-tuning on real data.</p>

      <fig id="F13" specific-use="star"><label>Figure 13</label><caption><p id="d2e4362">Generalization performance on real-world CSRM data from the OpenSWI-real dataset. <bold>(a)</bold> Reference <xref ref-type="bibr" rid="bib1.bibx62" id="paren.80"/> and predicted <inline-formula><mml:math id="M249" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> slices at depths of 20, 40, 60, and 80 km. <bold>(b)</bold> 1-D <inline-formula><mml:math id="M250" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> profiles at nine representative locations, with reference and predicted models shown in blue and red, respectively. <bold>(c)</bold> Comparison of phase and group velocity dispersion curves, including observed curves (black), synthetic curves from the reference model (blue for group velocity, light blue for phase velocity), and from the predicted model (red for group velocity, pink for phase velocity). <bold>(d)</bold> Error distributions of phase velocity (left), group velocity (middle), and their sum (right) with respect to observed curves, based on synthetic dispersion curves from the reference (blue) and predicted (purple) models.</p></caption>
          <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f13.jpg"/>

        </fig>

      <p id="d2e4409">In summary, these experiments confirm the strong generalization ability of the proposed method across a broad range of geological settings and depth regimes. More importantly, they highlight the effectiveness of the OpenSWI dataset series in enabling the training and evaluation of deep learning-based inversion techniques. With its extensive geological diversity, structural complexity, and broad spatial coverage, the OpenSWI dataset provides a solid foundation for learning transferable representations. As demonstrated, the resulting models can produce high-quality inversion results on real-world observations without retraining or domain adaptation, positioning OpenSWI as a valuable benchmark for advancing deep learning in realistic geophysical applications.</p>
</sec>
</sec>
<sec id="Ch1.S4">
  <label>4</label><title>Discussion</title>
      <p id="d2e4421">The OpenSWI dataset marks a substantial advancement in the development of AI-ready benchmark datasets for surface wave dispersion curve inversion. Compared to existing public datasets, OpenSWI offers significantly larger scale, broader spatial coverage, and enhanced geological diversity. Specifically, the OpenSWI-shallow subset contains over 22 million 1-D velocity profiles and their associated dispersion curves representing shallow subsurface structures (depths <inline-formula><mml:math id="M251" display="inline"><mml:mo>&lt;</mml:mo></mml:math></inline-formula> 3 km), while the OpenSWI-deep subset comprises approximately 1.28 million samples covering deeper Earth structures down to 300 km. In addition, the OpenSWI-real dataset provides real-world observational data for validating inversion methods under practical conditions. This comprehensive suite enables robust evaluation of machine learning–based approaches across synthetic and real data scenarios. Furthermore, a complete dataset construction toolkit, SWIDP, is released alongside the dataset, allowing users to flexibly generate customized datasets tailored to specific research needs.</p>
      <p id="d2e4431">Experimental results show that deep learning models trained exclusively on synthetic data from OpenSWI exhibit strong generalization to real-world observations, even without fine-tuning. This underscores the importance of large-scale, high-fidelity synthetic datasets in overcoming the challenges posed by the limited availability and annotation complexity of real seismic data. Practically, this indicates that reliable inversion results can be obtained even in regions with sparse or low-quality observations, thereby lowering the threshold for deploying machine learning models in real-world geophysical applications.</p>
      <p id="d2e4434">Despite these strengths, several limitations remain. First, the derivation of <inline-formula><mml:math id="M252" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">p</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> and <inline-formula><mml:math id="M253" display="inline"><mml:mi mathvariant="italic">ρ</mml:mi></mml:math></inline-formula> from <inline-formula><mml:math id="M254" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> through empirical relationships may introduce systematic biases, especially in regions with complex or atypical geological structures <xref ref-type="bibr" rid="bib1.bibx5" id="paren.81"/>. Second, although OpenSWI spans a wide array of tectonic and geological environments, it still underrepresents certain extreme (e.g. anisotropic media, fluid-saturated layers) or geodynamically active (e.g. mid-ocean ridges, highly deformed orogenic belts) settings, limiting its applicability in those areas. Third, the current dataset focuses primarily on fundamental-mode Rayleigh wave dispersion curves and does not incorporate higher modes or additional geophysical observables (e.g., ellipticity, receiver functions), which constrains its utility for joint inversion frameworks <xref ref-type="bibr" rid="bib1.bibx36 bib1.bibx30" id="paren.82"/>. Lastly, although OpenSWI incorporates a degree of noise and data incompleteness, it does not fully capture the complexities of real-world measurements, including uncertainties in source characteristics, instrument responses, and acquisition-related biases.</p>
      <p id="d2e4472">Future developments can be pursued along several interrelated directions. First, expanding the dataset’s geographic coverage and geological diversity, particularly in tectonically extreme regions, would broaden its applicability. In particular, large-scale synthetic datasets incorporating heterogeneous three-dimensional geological structures, such as the HEMEWS-3D database <xref ref-type="bibr" rid="bib1.bibx32" id="paren.83"/>, provide valuable resources for constructing more complex training and benchmarking scenarios. Second, integrating data across different modes, period ranges, and geological settings could enable more robust inversion approaches and improve transferability across regions. Third, incorporating additional real observational data to construct datasets suitable for hybrid or transfer learning would further enhance model generalization in field applications. Finally, including higher-mode dispersion curves and complementary geophysical observables would support more comprehensive multi-modal and multi-physics inversion strategies. We envision OpenSWI as a long-term, evolving community resource that will continue to drive data-driven advances in surface wave inversion and geophysical imaging.</p>
</sec>
<sec id="Ch1.S5">
  <label>5</label><title>Code and data availability</title>
      <p id="d2e4487">All codes, datasets, and experimental results in this study are publicly available to ensure reproducibility, validation, and further development. The Python toolkit <monospace>SWIDP</monospace>, available at <ext-link xlink:href="https://doi.org/10.5281/zenodo.16884901" ext-link-type="DOI">10.5281/zenodo.16884901</ext-link> <xref ref-type="bibr" rid="bib1.bibx35" id="paren.84"/> and <uri>https://github.com/liufeng2317/OpenSWI</uri> (last access: 18 April 2026), provides modules for 1-D velocity profile extraction and augmentation, layer parameter conversion, dispersion curve computation, and 2-D velocity model augmentation using diffusion models. The <monospace>OpenSWI</monospace> dataset, comprising <monospace>OpenSWI-shallow</monospace>, <monospace>OpenSWI-deep</monospace>, and <monospace>OpenSWI-real</monospace>, is released in a unified format with complete metadata, accessible via <ext-link xlink:href="https://doi.org/10.5281/zenodo.16874111" ext-link-type="DOI">10.5281/zenodo.16874111</ext-link> <xref ref-type="bibr" rid="bib1.bibx34" id="paren.85"/> and <uri>https://huggingface.co/datasets/LiuFeng2317/OpenSWI</uri> (last access: 18 April 2026). Deep learning training codes, pretrained model weights, and experimental results are also openly shared to support future research and applications.</p>
</sec>
<sec id="Ch1.S6" sec-type="conclusions">
  <label>6</label><title>Conclusions</title>
      <p id="d2e4532">In this study, we present OpenSWI, the first AI-ready benchmark dataset at the tens-of-millions scale specifically designed for surface wave dispersion curve inversion, along with a complete data generation toolkit, SWIDP. The dataset encompasses both shallow and deep subsurface velocity structures across a wide range of geological settings. Its large scale, geological diversity, and standardized formats for velocity profiles and dispersion curves provide a robust foundation for evaluating machine learning–based inversion methods. Experimental results show that models trained entirely on synthetic data from OpenSWI can generalize effectively to real-world observations, highlighting the dataset's practical value in improving the robustness and applicability of data-driven inversion approaches. Future developments will focus on expanding the dataset's geographic and geological coverage, incorporating additional geophysical observables to support more complex joint inversion tasks, and explore deeper integration with real observational data. We expect OpenSWI to serve as an open, continuously evolving community resource that promotes reproducible research and supports the broader application of machine learning methods in geophysical imaging.</p>
</sec>

      
      </body>
    <back><app-group>

<app id="App1.Ch1.S1">
  <label>Appendix A</label><title>Illustrative Code Examples for the OpenSWI-shallow Generation Workflow with SWIDP</title><fig id="LiA1"><label>Listing A1</label><caption><p id="d2e4550">Example code snippets for the OpenSWI-shallow generation workflow using SWIDP.</p></caption>
        
        <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-l01.png"/>

      </fig>


</app>

<app id="App1.Ch1.S2">
  <label>Appendix B</label><title>Illustrative Code Examples for the OpenSWI-deep Generation Workflow with SWIDP</title>

      <fig id="LiB1"><label>Listing B1</label><caption><p id="d2e4573">Example code snippets for the OpenSWI-deep generation workflow using SWIDP.</p></caption>
        
        <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-l02.png"/>

      </fig>

</app>

<app id="App1.Ch1.S3">
  <label>Appendix C</label><title>Diffusion Probabilistic Models for Continually Augmenting the OpenSWI-shallow Subsets</title>
<sec id="App1.Ch1.S3.SS1">
  <label>C1</label><title>Introduction to Denoising Diffusion Probabilistic Models (DDPMs)</title>
      <p id="d2e4599">Denoising Diffusion Probabilistic Models (DDPMs) are a class of powerful generative models that progressively refine noisy data to generate realistic outputs <xref ref-type="bibr" rid="bib1.bibx27 bib1.bibx53" id="paren.86"/>. The core principle of DDPMs involves a two-step diffusion process: a forward process in which noise is progressively added to the data, and a reverse process in which the model learns to remove the noise and recover the original data distribution. In this study, DDPMs are applied to model and augment geological structures within the OpenFWI dataset <xref ref-type="bibr" rid="bib1.bibx12" id="paren.87"/>, which consists of five subsets: FlatVel-A, FlatFault-A, CurveVel-A, CurveFault-A, and Style-A. These subsets represent various subsurface geophysical features, and by learning their distribution characteristics, DDPMs are capable of generating new, physically plausible velocity models that exhibit complex geological features such as faults, folds, and field-style structures.</p>
</sec>
<sec id="App1.Ch1.S3.SS2">
  <label>C2</label><title>Core Principle of DDPM</title>
      <p id="d2e4620">DDPMs are based on two main processes: <list list-type="bullet"><list-item>
      <p id="d2e4625"><italic>Forward diffusion</italic>. Starting from an input data point, Gaussian noise is progressively added in multiple steps, transforming the data into pure noise.</p></list-item><list-item>
      <p id="d2e4631"><italic>Reverse diffusion</italic>. The model learns to reverse this process, starting from random noise and progressively denoising it to recover the underlying data distribution.</p></list-item></list></p>
      <p id="d2e4636">The reverse denoising process is learned by training a neural network to predict the noise added at each diffusion step. The objective is to minimize the difference between the predicted noise and the actual noise, enabling the model to generate realistic data that follows the original distribution. Formally, the training loss function is defined as:

            <disp-formula id="App1.Ch1.S3.E3" content-type="numbered"><label>C1</label><mml:math id="M255" display="block"><mml:mrow><mml:mi>L</mml:mi><mml:mo>(</mml:mo><mml:mi mathvariant="italic">θ</mml:mi><mml:mo>)</mml:mo><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="double-struck">E</mml:mi><mml:mrow><mml:mi>q</mml:mi><mml:mo>(</mml:mo><mml:msub><mml:mi mathvariant="bold">x</mml:mi><mml:mn mathvariant="normal">0</mml:mn></mml:msub><mml:mo>)</mml:mo></mml:mrow></mml:msub><mml:mfenced close="]" open="["><mml:mrow><mml:munderover><mml:mo movablelimits="false">∑</mml:mo><mml:mrow><mml:mi>t</mml:mi><mml:mo>=</mml:mo><mml:mn mathvariant="normal">1</mml:mn></mml:mrow><mml:mi>T</mml:mi></mml:munderover><mml:mo>‖</mml:mo><mml:msub><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mi mathvariant="italic">θ</mml:mi></mml:msub><mml:mo>(</mml:mo><mml:msub><mml:mi mathvariant="bold">x</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>,</mml:mo><mml:mi>t</mml:mi><mml:mo>)</mml:mo><mml:mo>-</mml:mo><mml:msub><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:msup><mml:mo>‖</mml:mo><mml:mn mathvariant="normal">2</mml:mn></mml:msup></mml:mrow></mml:mfenced></mml:mrow></mml:math></disp-formula>

          where <inline-formula><mml:math id="M256" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mi mathvariant="italic">θ</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the predicted noise at step <inline-formula><mml:math id="M257" display="inline"><mml:mi>t</mml:mi></mml:math></inline-formula>, and <inline-formula><mml:math id="M258" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">ϵ</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> is the actual noise added during the forward diffusion process.</p>
      <p id="d2e4745">For further details on the DDPM methodology, please refer to <xref ref-type="bibr" rid="bib1.bibx27" id="text.88"/>. Additionally, an implementation of DDPM in PyTorch is available at <uri>https://github.com/lucidrains/denoising-diffusion-pytorch</uri> (last access: 18 April 2026).</p>
</sec>
<sec id="App1.Ch1.S3.SS3">
  <label>C3</label><title>Model Architecture and Training Configuration</title>
      <p id="d2e4762">The DDPM model used in this study follows a U-Net architecture with the following key components: <list list-type="bullet"><list-item>
      <p id="d2e4767"><italic>U-Net architecture</italic>. A convolutional neural network with an encoder-decoder structure. The encoder reduces the spatial resolution, and the decoder restores it to the original resolution (<inline-formula><mml:math id="M259" display="inline"><mml:mrow><mml:mn mathvariant="normal">64</mml:mn><mml:mo>×</mml:mo><mml:mn mathvariant="normal">64</mml:mn></mml:mrow></mml:math></inline-formula>). The architecture includes residual blocks and batch normalization.</p></list-item><list-item>
      <p id="d2e4785"><italic>Noise schedule</italic>. A linear noise schedule is applied during the forward diffusion process, where the variance of the Gaussian noise increases progressively with each step (total 1000 steps).</p></list-item><list-item>
      <p id="d2e4791"><italic>Optimizer</italic>: Adam optimizer with a learning rate of <inline-formula><mml:math id="M260" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">6</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, <inline-formula><mml:math id="M261" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">β</mml:mi><mml:mn mathvariant="normal">1</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.9</mml:mn></mml:mrow></mml:math></inline-formula>, and <inline-formula><mml:math id="M262" display="inline"><mml:mrow><mml:msub><mml:mi mathvariant="italic">β</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub><mml:mo>=</mml:mo><mml:mn mathvariant="normal">0.999</mml:mn></mml:mrow></mml:math></inline-formula>.</p></list-item><list-item>
      <p id="d2e4845"><italic>Training duration</italic>. The model was trained for 5000 epochs with a batch size of 256.</p></list-item></list></p>
      <p id="d2e4850">The training objective is to minimize the difference between the predicted and actual noise added during the forward diffusion process, as described by the loss function in the previous section.</p>
</sec>
<sec id="App1.Ch1.S3.SS4">
  <label>C4</label><title>DDPM sampling and OpenSWI-shallow datasets Generation</title>
      <p id="d2e4861">After training, the DDPM model is used for continuous data augmentation by generating new velocity models. This process involves sampling Gaussian noise and running the reverse diffusion process to produce realistic velocity models. The generated models reflect a variety of subsurface features, such as faults and complex sedimentary structures, ensuring physical plausibility. In practice, generating one 2D velocity model requires approximately 0.35 s on a single Ascend 910B2 NPU, making it feasible to rapidly expand the dataset when needed.</p>
      <p id="d2e4864">To facilitate the integration of the DDPM-generated models into the OpenSWI-shallow dataset, we provide a set of tools in the SWIDP pipline. These tools enable the extraction and conversion of the DDPM sampling results into 1-D velocity models, as required by OpenSWI-shallow. The process includes the following key steps: <list list-type="bullet"><list-item>
      <p id="d2e4869"><italic>DDPM sampling</italic>. The DDPM model generates new velocity models by progressively denoising random Gaussian noise.</p></list-item><list-item>
      <p id="d2e4875"><italic>Denormalization</italic>. The generated models, initially in normalized form, are denormalized to match the required velocity range.</p></list-item><list-item>
      <p id="d2e4881"><italic>Profile extraction and rationalization</italic>. The velocity models are then extracted into 1-D velocity profiles and rationalized to ensure geological consistency.</p></list-item><list-item>
      <p id="d2e4887"><italic>Dispersion curve calculation</italic>. The rationalized 1-D velocity profiles are used to calculate the corresponding dispersion curves, which are essential for surface wave inversion tasks.</p></list-item></list></p>
      <p id="d2e4892">By continually generating new data and performing the above operations, the OpenSWI-shallow dataset is augmented with a diverse set of realistic velocity profiles, further expanding the dataset's coverage and variability for improved inversion model robustness.</p>
</sec>
</app>

<app id="App1.Ch1.S4">
  <label>Appendix D</label><title>Statistical Analysis of the Diversity of Extracted 1-D Velocity Models</title>
      <p id="d2e4904">To evaluate the structural diversity of the extracted 1-D velocity models and to assess potential similarity introduced by sampling multiple profiles from the same 3-D geological models, we conducted several statistical analyses on the velocity structures.</p>

      <fig id="LiD1" specific-use="star"><label>Listing D1</label><caption><p id="d2e4909">Statistical analysis of the structural diversity of the extracted 1-D velocity models. <bold>(a)</bold> Distribution of <inline-formula><mml:math id="M263" display="inline"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> distances between randomly sampled pairs of shear-wave velocity profiles (<inline-formula><mml:math id="M264" display="inline"><mml:mrow><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">5</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> pairs), showing a broad range of structural differences among the extracted models. <bold>(b)</bold> PCA projection of the velocity profiles in a two-dimensional feature space. Colors denote profiles derived from different source 3-D geological models, illustrating both the separation between regional structural patterns and the variability introduced by the perturbation strategy within each model group.</p></caption>
        <graphic xlink:href="https://essd.copernicus.org/articles/18/2769/2026/essd-18-2769-2026-f14.jpg"/>

      </fig>

      <p id="d2e4946">First, we evaluated the similarity between randomly sampled pairs of velocity profiles. A total of <inline-formula><mml:math id="M265" display="inline"><mml:mrow><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mn mathvariant="normal">5</mml:mn></mml:msup></mml:mrow></mml:math></inline-formula> profile pairs were randomly selected from the extracted model library, and their differences were quantified using the <inline-formula><mml:math id="M266" display="inline"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> distance between shear-wave velocity vectors. Each profile was represented by a depth-sampled <inline-formula><mml:math id="M267" display="inline"><mml:mrow><mml:msub><mml:mi>V</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula> vector with consistent sampling intervals. The resulting distribution of <inline-formula><mml:math id="M268" display="inline"><mml:mrow><mml:msub><mml:mi>L</mml:mi><mml:mn mathvariant="normal">2</mml:mn></mml:msub></mml:mrow></mml:math></inline-formula> distances (Fig. <xref ref-type="fig" rid="LiD1"/>a) spans a broad range, indicating substantial structural variability among the velocity profiles despite being derived from a limited number of underlying 3-D models. This variability arises from both regional structural differences among the source geological models and the perturbation strategy applied during dataset augmentation.</p>
      <p id="d2e4996">Second, we performed a dimensionality reduction analysis using Principal Component Analysis (PCA) to visualize the global distribution of velocity structures. Each 1-D velocity profile was represented as a vector of shear-wave velocities sampled along depth and then projected into a two-dimensional principal component space. The PCA distributions for velocity profiles derived from different source models are shown in Fig. <xref ref-type="fig" rid="LiD1"/>b. The PCA projections demonstrate that profiles originating from different regional models occupy distinct regions in the reduced feature space, reflecting systematic variations in crustal and upper mantle structures across different tectonic settings. Meanwhile, profiles extracted from the same regional model still exhibit a relatively broad spread in the PCA space, indicating that the perturbation strategy introduces additional structural variability while preserving the large-scale geological characteristics of the original models.</p>
      <p id="d2e5001">Overall, these statistical analyses suggest that the extracted and augmented 1-D velocity models cover a wide range of structurally diverse velocity profiles while maintaining geologically realistic constraints inherited from the underlying 3-D models. This balance between geological realism and structural variability is essential for constructing a robust benchmark dataset for surface-wave dispersion inversion.</p>
</app>

<app id="App1.Ch1.S5">
  <label>Appendix E</label><title>Transformer-based Network Architecture for Different Datasets</title>

<table-wrap id="TE1"><label>Table E1</label><caption><p id="d2e5016">Transformer-based Network Architecture for Different Datasets.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="6">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1">Dataset</oasis:entry>
         <oasis:entry colname="col2">Input</oasis:entry>
         <oasis:entry colname="col3">Embedding</oasis:entry>
         <oasis:entry colname="col4">Transformer</oasis:entry>
         <oasis:entry colname="col5">Attention</oasis:entry>
         <oasis:entry colname="col6">Output</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">Shape</oasis:entry>
         <oasis:entry colname="col3">Dim.</oasis:entry>
         <oasis:entry colname="col4">Blocks</oasis:entry>
         <oasis:entry colname="col5">Heads</oasis:entry>
         <oasis:entry colname="col6">Shape</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI-shallow</oasis:entry>
         <oasis:entry colname="col2">3 <inline-formula><mml:math id="M270" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 100</oasis:entry>
         <oasis:entry colname="col3">64</oasis:entry>
         <oasis:entry colname="col4">3</oasis:entry>
         <oasis:entry colname="col5">8</oasis:entry>
         <oasis:entry colname="col6">1 <inline-formula><mml:math id="M271" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 70</oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI-deep</oasis:entry>
         <oasis:entry colname="col2">3 <inline-formula><mml:math id="M272" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
         <oasis:entry colname="col3">128</oasis:entry>
         <oasis:entry colname="col4">3</oasis:entry>
         <oasis:entry colname="col5">8</oasis:entry>
         <oasis:entry colname="col6">1 <inline-formula><mml:math id="M273" display="inline"><mml:mo>×</mml:mo></mml:math></inline-formula> 300</oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table><table-wrap-foot><p id="d2e5019">Note: The input shape consists of three features: period, phase velocity, and group velocity. The output shape corresponds to the shear-wave velocity (<inline-formula><mml:math id="M269" display="inline"><mml:mrow><mml:msub><mml:mi>v</mml:mi><mml:mi mathvariant="normal">s</mml:mi></mml:msub></mml:mrow></mml:math></inline-formula>).</p></table-wrap-foot></table-wrap>

</app>

<app id="App1.Ch1.S6">
  <label>Appendix F</label><title>Benchmarking Alternative Neural Network Architectures</title>
<sec id="App1.Ch1.S6.SS1">
  <label>F1</label><title>Compared Architectures</title>
      <p id="d2e5188">To assess the effectiveness of the proposed approach, three representative neural network architectures previously applied to surface-wave dispersion curve inversion are considered: a U-Net-based model <xref ref-type="bibr" rid="bib1.bibx57" id="paren.89"/>, a fully connected neural network (FCNN) <xref ref-type="bibr" rid="bib1.bibx8" id="paren.90"/>, and the Transformer-based architecture adopted in this study. The U-Net architecture is a convolutional encoder–decoder network originally developed for image segmentation and subsequently adapted to geophysical inversion problems. In this benchmark, a one-dimensional U-Net implementation following the design proposed by <xref ref-type="bibr" rid="bib1.bibx57" id="text.91"/> is adopted. The model consists of four encoder–decoder stages with skip connections, where convolutional layers progressively extract hierarchical features from the dispersion curves and reconstruct the corresponding subsurface shear-wave velocity profiles. The FCNN model follows the architecture described by <xref ref-type="bibr" rid="bib1.bibx8" id="text.92"/>. It consists of an initial convolutional layer serving as a feature embedding module, followed by seven fully connected layers that map dispersion-curve features directly to the target shear-wave velocity profile. Detailed architectural configurations of the U-Net and FCNN models are available in the corresponding references. In the present benchmark, both models are implemented following the configurations described in the original studies to maintain consistency with previous work.</p>

<table-wrap id="TF1" specific-use="star"><label>Table F1</label><caption><p id="d2e5211">Benchmark comparison of different neural network architectures and loss functions on the OpenSWI datasets. Values represent RMSE (km s<sup>−1</sup>) computed on the held-out test subsets. The best performance in each row is highlighted in bold.</p></caption><oasis:table frame="topbot"><oasis:tgroup cols="7">
     <oasis:colspec colnum="1" colname="col1" align="left"/>
     <oasis:colspec colnum="2" colname="col2" align="right"/>
     <oasis:colspec colnum="3" colname="col3" align="right"/>
     <oasis:colspec colnum="4" colname="col4" align="right"/>
     <oasis:colspec colnum="5" colname="col5" align="right"/>
     <oasis:colspec colnum="6" colname="col6" align="right"/>
     <oasis:colspec colnum="7" colname="col7" align="right"/>
     <oasis:thead>
       <oasis:row>
         <oasis:entry colname="col1">Dataset</oasis:entry>
         <oasis:entry colname="col2">U-Net</oasis:entry>
         <oasis:entry colname="col3">U-Net</oasis:entry>
         <oasis:entry colname="col4">FCNN</oasis:entry>
         <oasis:entry colname="col5">FCNN</oasis:entry>
         <oasis:entry colname="col6">Transformer</oasis:entry>
         <oasis:entry colname="col7">Transformer</oasis:entry>
       </oasis:row>
       <oasis:row rowsep="1">
         <oasis:entry colname="col1"/>
         <oasis:entry colname="col2">(MAE)</oasis:entry>
         <oasis:entry colname="col3">(MSE)</oasis:entry>
         <oasis:entry colname="col4">(MAE)</oasis:entry>
         <oasis:entry colname="col5">(MSE)</oasis:entry>
         <oasis:entry colname="col6">(MAE)</oasis:entry>
         <oasis:entry colname="col7">(MSE)</oasis:entry>
       </oasis:row>
     </oasis:thead>
     <oasis:tbody>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI-shallow</oasis:entry>
         <oasis:entry colname="col2">0.1825</oasis:entry>
         <oasis:entry colname="col3">0.1811</oasis:entry>
         <oasis:entry colname="col4">0.2199</oasis:entry>
         <oasis:entry colname="col5">0.2169</oasis:entry>
         <oasis:entry colname="col6">0.1124</oasis:entry>
         <oasis:entry colname="col7"><bold>0.1047</bold></oasis:entry>
       </oasis:row>
       <oasis:row>
         <oasis:entry colname="col1">OpenSWI-deep</oasis:entry>
         <oasis:entry colname="col2">0.0454</oasis:entry>
         <oasis:entry colname="col3">0.0421</oasis:entry>
         <oasis:entry colname="col4">0.0617</oasis:entry>
         <oasis:entry colname="col5">0.0554</oasis:entry>
         <oasis:entry colname="col6">0.0164</oasis:entry>
         <oasis:entry colname="col7"><bold>0.0163</bold></oasis:entry>
       </oasis:row>
     </oasis:tbody>
   </oasis:tgroup></oasis:table></table-wrap>

</sec>
<sec id="App1.Ch1.S6.SS2">
  <label>F2</label><title>Experimental Setup</title>
      <p id="d2e5361">The CNN/U-Net and FCNN architectures require fixed-length input representations. As a result, these models cannot be directly applied to dispersion curves with variable sampling densities or period ranges, such as those present in the OpenSWI-real dataset. Consequently, the benchmarking experiments are conducted exclusively on the OpenSWI-shallow and OpenSWI-deep datasets. To ensure a fair comparison across different architectures, several training strategies employed in the main experiments are intentionally simplified. In particular, no additional data augmentation techniques are applied in the benchmarking experiments, including the depth-aware masking strategy and the random noise injection described in the main text.</p>
      <p id="d2e5364">All models are trained using an identical dataset partitioning strategy, consisting of 90 %, 5 %, and 5 % splits for training, validation, and testing, respectively. The evaluation results reported here correspond to the performance on the held-out 5 % test subset. To further ensure consistency, identical optimization settings are adopted for all models. Specifically, the Adam optimizer is used with an initial learning rate of <inline-formula><mml:math id="M275" display="inline"><mml:mrow><mml:mn mathvariant="normal">1</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mn mathvariant="normal">10</mml:mn><mml:mrow><mml:mo>-</mml:mo><mml:mn mathvariant="normal">4</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:math></inline-formula>, combined with a warm-up phase followed by a step-based learning rate decay schedule (StepLR). The maximum number of training epochs is set to 50 for the OpenSWI-shallow dataset and 200 for the OpenSWI-deep dataset. To examine the potential influence of the training objective, two commonly used regression loss functions are considered: mean squared error (MSE) and mean absolute error (MAE). Each network architecture is trained separately using both loss functions under identical training configurations, resulting in six benchmarking experiments (three network architectures combined with two loss functions). For consistency, the evaluation metric reported in the comparison is the root mean square error (RMSE) between the predicted and reference shear-wave velocity profiles on the test dataset.</p>
</sec>
<sec id="App1.Ch1.S6.SS3">
  <label>F3</label><title>Results and Discussion</title>
      <p id="d2e5393">Table <xref ref-type="table" rid="TF1"/> summarizes the benchmarking results obtained using different network architectures and loss functions on the OpenSWI datasets. The results indicate that the Transformer-based architecture consistently achieves the lowest RMSE across both datasets and loss-function settings. The U-Net model exhibits comparable performance on the OpenSWI-shallow dataset but shows larger errors on the more challenging OpenSWI-deep dataset. In contrast, the FCNN model yields relatively higher errors overall, suggesting that its limited representational capacity may restrict its ability to capture the complex nonlinear relationships between dispersion curves and subsurface velocity structures. Regarding the influence of the loss function, the RMSE values obtained using MSE and MAE are generally similar, with only minor variations between the two settings. This observation suggests that the overall inversion performance is primarily governed by the network architecture rather than the specific regression loss used during training.</p>
      <p id="d2e5398">Beyond the quantitative accuracy presented in Table <xref ref-type="table" rid="TF1"/>, an important practical distinction lies in the ability of different architectures to generalize to real observational datasets. The CNN/U-Net and FCNN models require fixed-length input representations and therefore cannot be directly applied to dispersion curves with varying sampling densities or period ranges, such as those encountered in the OpenSWI-real dataset. In contrast, the Transformer-based architecture naturally supports variable-length input sequences and can therefore be applied directly to real observational dispersion curves without additional preprocessing or retraining. These results highlight an important consideration for future deep-learning-based surface-wave inversion methods: in addition to achieving strong performance on synthetic benchmark datasets, inversion models should also possess sufficient flexibility to accommodate dispersion curves with varying period ranges and sampling densities commonly encountered in real-world applications.</p>
</sec>
</app>
  </app-group><notes notes-type="authorcontribution"><title>Author contributions</title>

      <p id="d2e5408">FL contributed to conceptualization, data curation, methodology, software development, formal analysis, writing of the original draft, review and editing of the manuscript, and visualization. SZ contributed to conceptualization, data curation, methodology, formal analysis, and manuscript review and editing. XG contributed to resources, supervision, manuscript review and editing, and visualization. FLi contributed to resources, supervision, and manuscript review and editing. PZ contributed to resources, supervision, and manuscript review and editing. YL contributed to supervision, manuscript review and editing, and project administration. RS and LB contributed to supervision, manuscript review and editing, and funding acquisition. LF, LZ, and JH contributed to manuscript review and editing.</p>
  </notes><notes notes-type="competinginterests"><title>Competing interests</title>

      <p id="d2e5414">The contact author has declared that none of the authors has any competing interests.</p>
  </notes><notes notes-type="disclaimer"><title>Disclaimer</title>

      <p id="d2e5420">Publisher's note: Copernicus Publications remains neutral with regard to jurisdictional claims made in the text, published maps, institutional affiliations, or any other geographical representation in this paper. The authors bear the ultimate responsibility for providing appropriate place names. Views expressed in the text are those of the authors and do not necessarily reflect the views of the publisher.</p>
  </notes><ack><title>Acknowledgements</title><p id="d2e5426">We sincerely thank the Topic Editor Andrea Rovida, the Editor-in-Chief Kirsten Elger, reviewer Filippo Gatti, and one anonymous reviewer for their valuable time and constructive comments, which have significantly improved this study. We also acknowledge the Science Discovery Platform (Intern-Discovery; available at <uri>https://discovery.intern-ai.org.cn</uri>, last access: 18 April 2026) for supporting the testing environment and the Shanghai Artificial Intelligence Laboratory for providing computational resources.</p></ack><notes notes-type="financialsupport"><title>Financial support</title>

      <p id="d2e5434">This research has been supported by the National Natural Science Foundation of China (grant no. 42504129) and the University-Industry Collaborative Education Program (grant no. 2504244609).</p>
  </notes><notes notes-type="reviewstatement"><title>Review statement</title>

      <p id="d2e5440">This paper was edited by Andrea Rovida and reviewed by Filippo Gatti and one anonymous referee.</p>
  </notes><ref-list>
    <title>References</title>

      <ref id="bib1.bibx1"><label>Aleardi and Stucchi(2021)</label><mixed-citation>Aleardi, M. and Stucchi, E.: A Hybrid Residual Neural Network–Monte Carlo Approach to Invert Surface Wave Dispersion Data, Near Surf. Geophys., 19, 397–414, <ext-link xlink:href="https://doi.org/10.1002/nsg.12163" ext-link-type="DOI">10.1002/nsg.12163</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx2"><label>Bensen et al.(2007)Bensen, Ritzwoller, Barmin, Levshin, Lin, Moschetti, Shapiro, and Yang</label><mixed-citation>Bensen, G. D., Ritzwoller, M. H., Barmin, M. P., Levshin, A. L., Lin, F., Moschetti, M. P., Shapiro, N. M., and Yang, Y.: Processing Seismic Ambient Noise Data to Obtain Reliable Broad-Band Surface Wave Dispersion Measurements, Geophys. J. Int., 169, 1239–1260, <ext-link xlink:href="https://doi.org/10.1111/j.1365-246X.2007.03374.x" ext-link-type="DOI">10.1111/j.1365-246X.2007.03374.x</ext-link>, 2007.</mixed-citation></ref>
      <ref id="bib1.bibx3"><label>Berg et al.(2020)Berg, Lin, Allam, Schulte-Pelkum, Ward, and Shen</label><mixed-citation>Berg, E. M., Lin, F.-C., Allam, A., Schulte-Pelkum, V., Ward, K. M., and Shen, W.: Shear Velocity Model of Alaska via Joint Inversion of Rayleigh Wave Ellipticity, Phase Velocities, and Receiver Functions across the Alaska Transportable Array, J. Geophys. Res.-Sol. Ea., 125, <ext-link xlink:href="https://doi.org/10.1029/2019jb018582" ext-link-type="DOI">10.1029/2019jb018582</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx4"><label>Blom et al.(2020)Blom, Gokhberg, and Fichtner</label><mixed-citation>Blom, N., Gokhberg, A., and Fichtner, A.: Seismic waveform tomography of the central and eastern Mediterranean upper mantle, Solid Earth, 11, 669–690, <ext-link xlink:href="https://doi.org/10.5194/se-11-669-2020" ext-link-type="DOI">10.5194/se-11-669-2020</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx5"><label>Brocher(2005)</label><mixed-citation>Brocher, T. M.: Empirical Relations between Elastic Wavespeeds and Density in the Earth's Crust, B. Seismol. Soc. Am., 95, 2081–2092, <ext-link xlink:href="https://doi.org/10.1785/0120050077" ext-link-type="DOI">10.1785/0120050077</ext-link>, 2005.</mixed-citation></ref>
      <ref id="bib1.bibx6"><label>Cai et al.(2022)Cai, Qiu, and Niu</label><mixed-citation>Cai, A., Qiu, H., and Niu, F.: Semi-Supervised Surface Wave Tomography With Wasserstein Cycle-Consistent GAN: Method and Application to Southern California Plate Boundary Region, J. Geophys. Res.-Sol. Ea., 127, e2021JB023598, <ext-link xlink:href="https://doi.org/10.1029/2021JB023598" ext-link-type="DOI">10.1029/2021JB023598</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx7"><label>Cao et al.(2020)Cao, Earp, De Ridder, Curtis, and Galetti</label><mixed-citation>Cao, R., Earp, S., De Ridder, S. A. L., Curtis, A., and Galetti, E.: Near-Real-Time near-Surface 3D Seismic Velocity and Uncertainty Models by Wavefield Gradiometry and Neural Network Inversion of Ambient Seismic Noise, Geophysics, 85, KS13–KS27, <ext-link xlink:href="https://doi.org/10.1190/geo2018-0562.1" ext-link-type="DOI">10.1190/geo2018-0562.1</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx8"><label>Chen et al.(2024)Chen, Xia, Feng, Pang, and Zhang</label><mixed-citation>Chen, X., Xia, J., Feng, J., Pang, J., and Zhang, H.: Surface Wave Inversion Using a Multi-Information Fusion Neural Network, IEEE T. Geosci. Remote, 62, 1–13, <ext-link xlink:href="https://doi.org/10.1109/TGRS.2024.3356663" ext-link-type="DOI">10.1109/TGRS.2024.3356663</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx9"><label>Chen et al.(2025)Chen, Xia, Feng, Cheng, Pang, and Hong</label><mixed-citation>Chen, X., Xia, J., Feng, J., Cheng, F., Pang, J., and Hong, Y.: Why Choose Deep Learning for Surface-Wave Inversion, Surv. Geophys., 46, 695–722, <ext-link xlink:href="https://doi.org/10.1007/s10712-025-09882-y" ext-link-type="DOI">10.1007/s10712-025-09882-y</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx10"><label>Colli et al.(2013)Colli, Fichtner, and Bunge</label><mixed-citation>Colli, L., Fichtner, A., and Bunge, H.-P.: Full Waveform Tomography of the Upper Mantle in the South Atlantic Region: Imaging a Westward Fluxing Shallow Asthenosphere?, Tectonophysics, 604, 26–40, <ext-link xlink:href="https://doi.org/10.1016/j.tecto.2013.06.015" ext-link-type="DOI">10.1016/j.tecto.2013.06.015</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx11"><label>Çubuk-Sabuncu et al.(2017)Çubuk-Sabuncu, Taymaz, and Fichtner</label><mixed-citation>Çubuk-Sabuncu, Y., Taymaz, T., and Fichtner, A.: 3-D Crustal Velocity Structure of Western Turkey: Constraints from Full-Waveform Tomography, Phys. Earth Planet. In., 270, 90–112, <ext-link xlink:href="https://doi.org/10.1016/j.pepi.2017.06.014" ext-link-type="DOI">10.1016/j.pepi.2017.06.014</ext-link>, 2017.</mixed-citation></ref>
      <ref id="bib1.bibx12"><label>Deng et al.(2022)Deng, Feng, Wang, Zhang, Jin, Feng, Zeng, Chen, and Lin</label><mixed-citation>Deng, C., Feng, S., Wang, H., Zhang, X., Jin, P., Feng, Y., Zeng, Q., Chen, Y., and Lin, Y.: OPENFWI: Large-scale multi-structural benchmark datasets for full waveform inversion, Adv. Neur. In., <uri>https://dl.acm.org/doi/10.5555/3600270.3600705</uri> (last access: 18 April 2026), 2022.</mixed-citation></ref>
      <ref id="bib1.bibx13"><label>Feng et al.(2023)Feng, Wang, Deng, Feng, Liu, Zhu, Jin, Chen, and Lin</label><mixed-citation> Feng, S., Wang, H., Deng, C., Feng, Y., Liu, Y., Zhu, M., Jin, P., Chen, Y., and Lin, Y.: EFWI Multiparameter Benchmark Datasets for Elastic Full Waveform Inversion of Geophysical Properties, Adv. Neur. In., 36, 23701–23713, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx14"><label>Fichtner and Villaseñor(2015)</label><mixed-citation>Fichtner, A. and Villaseñor, A.: Crust and Upper Mantle of the Western Mediterranean – Constraints from Full-Waveform Inversion, Earth Planet. Sc. Lett., 428, 52–62, <ext-link xlink:href="https://doi.org/10.1016/j.epsl.2015.07.038" ext-link-type="DOI">10.1016/j.epsl.2015.07.038</ext-link>, 2015.</mixed-citation></ref>
      <ref id="bib1.bibx15"><label>Fichtner et al.(2006)Fichtner, Bunge, and Igel</label><mixed-citation>Fichtner, A., Bunge, H.-P., and Igel, H.: The Adjoint Method in Seismology, Phys. Earth Planet. In., 157, 86–104, <ext-link xlink:href="https://doi.org/10.1016/j.pepi.2006.03.016" ext-link-type="DOI">10.1016/j.pepi.2006.03.016</ext-link>, 2006.</mixed-citation></ref>
      <ref id="bib1.bibx16"><label>Fichtner et al.(2009)Fichtner, Kennett, Igel, and Bunge</label><mixed-citation>Fichtner, A., Kennett, B. L. N., Igel, H., and Bunge, H.-P.: Full Seismic Waveform Tomography for Upper-Mantle Structure in the Australasian Region Using Adjoint Methods, Geophys. J. Int., 179, 1703–1725, <ext-link xlink:href="https://doi.org/10.1111/j.1365-246x.2009.04368.x" ext-link-type="DOI">10.1111/j.1365-246x.2009.04368.x</ext-link>, 2009.</mixed-citation></ref>
      <ref id="bib1.bibx17"><label>Fichtner et al.(2010)Fichtner, Kennett, Igel, and Bunge</label><mixed-citation>Fichtner, A., Kennett, B. L., Igel, H., and Bunge, H.-P.: Full Waveform Tomography for Radially Anisotropic Structure: New Insights into Present and Past States of the Australasian Upper Mantle, Earth Planet. Sc. Lett., 290, 270–280, <ext-link xlink:href="https://doi.org/10.1016/j.epsl.2009.12.003" ext-link-type="DOI">10.1016/j.epsl.2009.12.003</ext-link>, 2010.</mixed-citation></ref>
      <ref id="bib1.bibx18"><label>Fichtner et al.(2013)Fichtner, Trampert, Cupillard, Saygin, Taymaz, Capdeville, and Villaseñor</label><mixed-citation>Fichtner, A., Trampert, J., Cupillard, P., Saygin, E., Taymaz, T., Capdeville, Y., and Villaseñor, A.: Multiscale Full Waveform Inversion, Geophys. J. Int., 194, 534–556, <ext-link xlink:href="https://doi.org/10.1093/gji/ggt118" ext-link-type="DOI">10.1093/gji/ggt118</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx19"><label>Fichtner et al.(2018)Fichtner, van Herwaarden, Afanasiev, Simutė, Krischer, Çubuk-Sabuncu, Taymaz, Colli, Saygin, Villaseñor, Trampert, Cupillard, Bunge, and Igel</label><mixed-citation>Fichtner, A., van Herwaarden, D.-P., Afanasiev, M., Simutė, S., Krischer, L., Çubuk-Sabuncu, Y., Taymaz, T., Colli, L., Saygin, E., Villaseñor, A., Trampert, J., Cupillard, P., Bunge, H.-P., and Igel, H.: The Collaborative Seismic Earth Model: Generation 1, Geophys. Res. Lett., 45, 4007–4016, <ext-link xlink:href="https://doi.org/10.1029/2018gl077338" ext-link-type="DOI">10.1029/2018gl077338</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx20"><label>Foti et al.(2009)Foti, Comina, Boiero, and Socco</label><mixed-citation>Foti, S., Comina, C., Boiero, D., and Socco, L.: Non-Uniqueness in Surface-Wave Inversion and Consequences on Seismic Site Response Analyses, Soil Dyn. Earthq. Eng., 29, 982–993, <ext-link xlink:href="https://doi.org/10.1016/j.soildyn.2008.11.004" ext-link-type="DOI">10.1016/j.soildyn.2008.11.004</ext-link>, 2009.</mixed-citation></ref>
      <ref id="bib1.bibx21"><label>Foti et al.(2014)Foti, Lai, Rix, and Strobbia</label><mixed-citation>Foti, S., Lai, C., Rix, G. J., and Strobbia, C.: Surface Wave Methods for Near-Surface Site Characterization, CRC Press,  ISBN 978-0-429-17853-5, <ext-link xlink:href="https://doi.org/10.1201/b17268" ext-link-type="DOI">10.1201/b17268</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx22"><label>Fu et al.(2022)Fu, Pan, Li, Dong, Ma, and Chen</label><mixed-citation>Fu, L., Pan, L., Li, Z., Dong, S., Ma, Q., and Chen, X.: Improved High-resolution 3D vs Model of Long Beach, CA: Inversion of Multimodal Dispersion Curves from Ambient Noise of a Dense Array, Geophys. Res. Lett., 49, e2021GL097619, <ext-link xlink:href="https://doi.org/10.1029/2021GL097619" ext-link-type="DOI">10.1029/2021GL097619</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx23"><label>Gan et al.(2024)Gan, Yang, Pan, Sun, Zhang, Gao, and Chen</label><mixed-citation>Gan, Y., Yang, Z., Pan, L., Sun, Y.-C., Zhang, D., Gao, Y., and Chen, X.: Deep Learning-Based Dispersion Spectrum Inversion for Surface Wave Exploration, IEEE T. Geosci. Remote, 62, 1–11, <ext-link xlink:href="https://doi.org/10.1109/TGRS.2024.3399033" ext-link-type="DOI">10.1109/TGRS.2024.3399033</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx24"><label>Gao et al.(2025)Gao, Wu, Sun, Hou, Gao, Wang, and Sheng</label><mixed-citation>Gao, H., Wu, X., Sun, X., Hou, M., Gao, H., Wang, G., and Sheng, H.: cigFacies: a massive-scale benchmark dataset of seismic facies and its application, Earth Syst. Sci. Data, 17, 595–609, <ext-link xlink:href="https://doi.org/10.5194/essd-17-595-2025" ext-link-type="DOI">10.5194/essd-17-595-2025</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx25"><label>Haskell(1953)</label><mixed-citation>Haskell, N. A.: The Dispersion of Surface Waves on Multilayered Media, in: Vincit Veritas: A Portrait of the Life and Work of Norman Abraham Haskell, 1905–1970, edited by Ben-Menahem, A., vol. 43, American Geophysical Union, Washington, D. C., 86–103, ISBN 978-0-87590-762-8, <ext-link xlink:href="https://doi.org/10.1785/BSSA0430010017" ext-link-type="DOI">10.1785/BSSA0430010017</ext-link>, 1953.</mixed-citation></ref>
      <ref id="bib1.bibx26"><label>Herrmann(2013)</label><mixed-citation>Herrmann, R. B.: Computer Programs in Seismology: An Evolving Tool for Instruction and Research, Seismol. Res. Lett., 84, 1081–1088, <ext-link xlink:href="https://doi.org/10.1785/0220110096" ext-link-type="DOI">10.1785/0220110096</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx27"><label>Ho et al.(2020)Ho, Jain, and Abbeel</label><mixed-citation>Ho, J., Jain, A., and Abbeel, P.: Denoising Diffusion Probabilistic Models, arXiv [preprint], <ext-link xlink:href="https://doi.org/10.48550/arXiv.2006.11239" ext-link-type="DOI">10.48550/arXiv.2006.11239</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx28"><label>Hu et al.(2020)Hu, Qiu, Zhang, and Ben-Zion</label><mixed-citation>Hu, J., Qiu, H., Zhang, H., and Ben-Zion, Y.: Using Deep Learning to Derive Shear-Wave Velocity Models from Surface-Wave Dispersion Data, Seismol. Res. Lett., 91, 1738–1751, <ext-link xlink:href="https://doi.org/10.1785/0220190222" ext-link-type="DOI">10.1785/0220190222</ext-link>, 2020.</mixed-citation></ref>
      <ref id="bib1.bibx29"><label>Huang et al.(2024)Huang, Yu, Wang, and Wang</label><mixed-citation>Huang, X., Yu, Z., Wang, W., and Wang, F.: JointNet: A Multimodal Deep Learning-Based Approach for Joint Inversion of Rayleigh Wave Dispersion and Ellipticity, B. Seismol. Soc. Am., 114, 627–641, <ext-link xlink:href="https://doi.org/10.1785/0120230199" ext-link-type="DOI">10.1785/0120230199</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx30"><label>Jiang et al.(2025)Jiang, Ma, Ning, Li, Wu, and Bao</label><mixed-citation>Jiang, Y., Ma, J., Ning, J., Li, J., Wu, H., and Bao, T.: One-Fit-All Transformer for Multimodal Geophysical Inversion: Method and Application, Journal of Geophysical Research: Machine Learning and Computation, 2, e2024JH000432, <ext-link xlink:href="https://doi.org/10.1029/2024JH000432" ext-link-type="DOI">10.1029/2024JH000432</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx31"><label>Krischer et al.(2018)Krischer, Fichtner, Boehm, and Igel</label><mixed-citation>Krischer, L., Fichtner, A., Boehm, C., and Igel, H.: Automated Large-scale Full Seismic Waveform Inversion for North America and the North Atlantic, J. Geophys. Res.-Sol. Ea., 123, 5902–5928, <ext-link xlink:href="https://doi.org/10.1029/2017JB015289" ext-link-type="DOI">10.1029/2017JB015289</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx32"><label>Lehmann et al.(2024)Lehmann, Gatti, Bertin, and Clouteau</label><mixed-citation>Lehmann, F., Gatti, F., Bertin, M., and Clouteau, D.: Synthetic ground motions in heterogeneous geologies from various sources: the HEMEWS-3D database, Earth Syst. Sci. Data, 16, 3949–3972, <ext-link xlink:href="https://doi.org/10.5194/essd-16-3949-2024" ext-link-type="DOI">10.5194/essd-16-3949-2024</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx33"><label>Lin et al.(2013)Lin, Li, Clayton, and Hollis</label><mixed-citation>Lin, F.-C., Li, D., Clayton, R. W., and Hollis, D.: High-Resolution 3D Shallow Crustal Structure in Long Beach, California: Application of Ambient Noise Tomography on a Dense Seismic Array, Geophysics, 78, Q45–Q56, <ext-link xlink:href="https://doi.org/10.1190/geo2012-0453.1" ext-link-type="DOI">10.1190/geo2012-0453.1</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx34"><label>Liu(2025a)</label><mixed-citation>Liu, F.: OpenSWI-dataset, Zenodo [data set], <ext-link xlink:href="https://doi.org/10.5281/zenodo.16874111" ext-link-type="DOI">10.5281/zenodo.16874111</ext-link>, 2025a.</mixed-citation></ref>
      <ref id="bib1.bibx35"><label>Liu(2025b)</label><mixed-citation>Liu, F.: OpenSWI-toolbox, Zenodo [code], <ext-link xlink:href="https://doi.org/10.5281/zenodo.16884901" ext-link-type="DOI">10.5281/zenodo.16884901</ext-link>, 2025b.</mixed-citation></ref>
      <ref id="bib1.bibx36"><label>Liu et al.(2024)Liu, Li, Fu, and Lu</label><mixed-citation>Liu, F., Li, J., Fu, L., and Lu, L.: Multimodal Surface Wave Inversion with Automatic Differentiation, Geophys. J. Int., 238, 290–312, <ext-link xlink:href="https://doi.org/10.1093/gji/ggae155" ext-link-type="DOI">10.1093/gji/ggae155</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx37"><label>Liu et al.(2025)Liu, Deng, Su, Bai, and Ouyang</label><mixed-citation>Liu, F., Deng, B., Su, R., Bai, L., and Ouyang, W.: DispFormer: Pretrained Transformer for Flexible Dispersion Curve Inversion from Global Synthesis to Regional Applications, arXiv [preprint], <ext-link xlink:href="https://doi.org/10.48550/ARXIV.2501.04366" ext-link-type="DOI">10.48550/ARXIV.2501.04366</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx38"><label>Lu et al.(2018)Lu, Stehly, Paul, and AlpArray Working Group</label><mixed-citation>Lu, Y., Stehly, L., Paul, A., and AlpArray Working Group: High-Resolution Surface Wave Tomography of the European Crust and Uppermost Mantle from Ambient Seismic Noise, Geophys. J. Int., 214, 1136–1150, <ext-link xlink:href="https://doi.org/10.1093/gji/ggy188" ext-link-type="DOI">10.1093/gji/ggy188</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx39"><label>Luo et al.(2022)Luo, Huang, Yang, Zhao, Yang, and Xu</label><mixed-citation>Luo, Y., Huang, Y., Yang, Y., Zhao, K., Yang, X., and Xu, H.: Constructing Shear Velocity Models from Surface Wave Dispersion Curves Using Deep Learning, J. Appl. Geophys., 196, 104524, <ext-link xlink:href="https://doi.org/10.1016/j.jappgeo.2021.104524" ext-link-type="DOI">10.1016/j.jappgeo.2021.104524</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx40"><label>Merrifield et al.(2022)Merrifield, Griffith, Zamanian, Gesbert, Sen, De La Torre Guzman, Potter, and Kuehl</label><mixed-citation>Merrifield, T. P., Griffith, D. P., Zamanian, S. A., Gesbert, S., Sen, S., De La Torre Guzman, J., Potter, R. D., and Kuehl, H.: Synthetic Seismic Data for Training Deep Learning Networks, Interpretation, 10, SE31–SE39, <ext-link xlink:href="https://doi.org/10.1190/INT-2021-0193.1" ext-link-type="DOI">10.1190/INT-2021-0193.1</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx41"><label>Michelini et al.(2021)Michelini, Cianetti, Gaviano, Giunchi, Jozinović, and Lauciani</label><mixed-citation>Michelini, A., Cianetti, S., Gaviano, S., Giunchi, C., Jozinović, D., and Lauciani, V.: INSTANCE – the Italian seismic dataset for machine learning, Earth Syst. Sci. Data, 13, 5509–5544, <ext-link xlink:href="https://doi.org/10.5194/essd-13-5509-2021" ext-link-type="DOI">10.5194/essd-13-5509-2021</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx42"><label>Mousavi et al.(2019)Mousavi, Sheng, Zhu, and Beroza</label><mixed-citation>Mousavi, S. M., Sheng, Y., Zhu, W., and Beroza, G. C.: STanford EArthquake Dataset (STEAD): A Global Data Set of Seismic Signals for AI, IEEE Access, 7, 179464–179476, <ext-link xlink:href="https://doi.org/10.1109/ACCESS.2019.2947848" ext-link-type="DOI">10.1109/ACCESS.2019.2947848</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx43"><label>Park et al.(1999)Park, Miller, and Xia</label><mixed-citation>Park, C. B., Miller, R. D., and Xia, J.: Multichannel Analysis of Surface Waves, Geophysics, 64, 800–808, <ext-link xlink:href="https://doi.org/10.1190/1.1444590" ext-link-type="DOI">10.1190/1.1444590</ext-link>, 1999.</mixed-citation></ref>
      <ref id="bib1.bibx44"><label>Pasyanos et al.(2014)Pasyanos, Masters, Laske, and Ma</label><mixed-citation>Pasyanos, M. E., Masters, T. G., Laske, G., and Ma, Z.: LITHO1.0: An Updated Crust and Lithospheric Model of the Earth, J. Geophys. Res.-Sol. Ea., 119, 2153–2173, <ext-link xlink:href="https://doi.org/10.1002/2013JB010626" ext-link-type="DOI">10.1002/2013JB010626</ext-link>, 2014.</mixed-citation></ref>
      <ref id="bib1.bibx45"><label>Reid et al.(2025)Reid, Olivier, and Jones</label><mixed-citation>Reid, A., Olivier, G., and Jones, T.: Ambient Noise Tomography: A Sensitive, Rapid, Passive Seismic Technique for Mineral Exploration, SEG Discovery, 17–26, <ext-link xlink:href="https://doi.org/10.5382/SEGnews.2025-140.fea-01" ext-link-type="DOI">10.5382/SEGnews.2025-140.fea-01</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx46"><label>Rickers et al.(2013)Rickers, Fichtner, and Trampert</label><mixed-citation>Rickers, F., Fichtner, A., and Trampert, J.: The Iceland–Jan Mayen Plume System and Its Impact on Mantle Dynamics in the North Atlantic Region: Evidence from Full-Waveform Inversion, Earth Planet. Sc. Lett., 367, 39–51, <ext-link xlink:href="https://doi.org/10.1016/j.epsl.2013.02.022" ext-link-type="DOI">10.1016/j.epsl.2013.02.022</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx47"><label>Shapiro and Campillo(2004)</label><mixed-citation>Shapiro, N. M. and Campillo, M.: Emergence of Broadband Rayleigh Waves from Correlations of the Ambient Seismic Noise, Geophys. Res. Lett., 31, 2004GL019491, <ext-link xlink:href="https://doi.org/10.1029/2004GL019491" ext-link-type="DOI">10.1029/2004GL019491</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bibx48"><label>Shapiro and Ritzwoller(2002)</label><mixed-citation>Shapiro, N. M. and Ritzwoller, M. H.: Monte-Carlo Inversion for a Global Shear-Velocity Model of the Crust and Upper Mantle, Geophys. J. Int., 151, 88–105, <ext-link xlink:href="https://doi.org/10.1046/j.1365-246X.2002.01742.x" ext-link-type="DOI">10.1046/j.1365-246X.2002.01742.x</ext-link>, 2002.</mixed-citation></ref>
      <ref id="bib1.bibx49"><label>Shen et al.(2013)Shen, Ritzwoller, and Schulte-Pelkum</label><mixed-citation>Shen, W., Ritzwoller, M. H., and Schulte-Pelkum, V.: A 3-D Model of the Crust and Uppermost Mantle beneath the Central and Western US by Joint Inversion of Receiver Functions and Surface Wave Dispersion, J. Geophys. Res.-Sol. Ea., 118, 262–276, <ext-link xlink:href="https://doi.org/10.1029/2012JB009602" ext-link-type="DOI">10.1029/2012JB009602</ext-link>, 2013.</mixed-citation></ref>
      <ref id="bib1.bibx50"><label>Shen et al.(2016)Shen, Ritzwoller, Kang, Kim, Lin, Ning, Wang, Zheng, and Zhou</label><mixed-citation>Shen, W., Ritzwoller, M. H., Kang, D., Kim, Y., Lin, F.-C., Ning, J., Wang, W., Zheng, Y., and Zhou, L.: A Seismic Reference Model for the Crust and Uppermost Mantle beneath China from Surface Wave Dispersion, Geophys. J. Int., 206, 954–979, <ext-link xlink:href="https://doi.org/10.1093/gji/ggw175" ext-link-type="DOI">10.1093/gji/ggw175</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bibx51"><label>Simutė et al.(2016)</label><mixed-citation>Simutė, S., Steptoe, H., Cobden, L., Gokhberg, A., and Fichtner, A.: Full-waveform inversion of the Japanese islands region, J. Geophys. Res.-Sol. Ea., 121, 3722–3741, <ext-link xlink:href="https://doi.org/10.1002/2016jb012802" ext-link-type="DOI">10.1002/2016jb012802</ext-link>, 2016.</mixed-citation></ref>
      <ref id="bib1.bibx52"><label>Socco and Strobbia(2004)</label><mixed-citation>Socco, L. and Strobbia, C.: Surface-wave Method for Near-surface Characterization: A Tutorial, Near Surf. Geophys., 2, 165–185, <ext-link xlink:href="https://doi.org/10.3997/1873-0604.2004015" ext-link-type="DOI">10.3997/1873-0604.2004015</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bibx53"><label>Taufik et al.(2024)Taufik, Wang, and Alkhalifah</label><mixed-citation>Taufik, M. H., Wang, F., and Alkhalifah, T.: Learned Regularizations for Multi-Parameter Elastic Full Waveform Inversion Using Diffusion Models, Journal of Geophysical Research: Machine Learning and Computation, 1, e2024JH000125, <ext-link xlink:href="https://doi.org/10.1029/2024JH000125" ext-link-type="DOI">10.1029/2024JH000125</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx54"><label>Thomson(1950)</label><mixed-citation>Thomson, W. T.: Transmission of Elastic Waves through a Stratified Solid Medium, J. Appl. Phys., 21, 89–93, <ext-link xlink:href="https://doi.org/10.1063/1.1699629" ext-link-type="DOI">10.1063/1.1699629</ext-link>, 1950.</mixed-citation></ref>
      <ref id="bib1.bibx55"><label>Wang et al.(2022)Wang, Song, and Li</label><mixed-citation>Wang, F., Song, X., and Li, J.: Deep Learning-Based <inline-formula><mml:math id="M276" display="inline"><mml:mrow><mml:msub><mml:mi>H</mml:mi><mml:mrow><mml:mo>-</mml:mo><mml:mi mathvariant="italic">κ</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></inline-formula> Method (HkNet) for Estimating Crustal Thickness and <inline-formula><mml:math id="M277" display="inline"><mml:mrow><mml:mi>V</mml:mi><mml:mi>p</mml:mi><mml:mo>/</mml:mo><mml:mi>V</mml:mi><mml:mi>s</mml:mi></mml:mrow></mml:math></inline-formula> Ratio From Receiver Functions, J. Geophys. Res.-Sol. Ea., 127, e2022JB023944, <ext-link xlink:href="https://doi.org/10.1029/2022JB023944" ext-link-type="DOI">10.1029/2022JB023944</ext-link>, 2022.</mixed-citation></ref>
      <ref id="bib1.bibx56"><label>Wang et al.(2023a)Wang, Huang, and Alkhalifah</label><mixed-citation>Wang, F., Huang, X., and Alkhalifah, T. A.: A Prior Regularized Full Waveform Inversion Using Generative Diffusion Models, IEEE T. Geosci. Remote, 61, 1–11, <ext-link xlink:href="https://doi.org/10.1109/tgrs.2023.3337014" ext-link-type="DOI">10.1109/tgrs.2023.3337014</ext-link>, 2023a.</mixed-citation></ref>
      <ref id="bib1.bibx57"><label>Wang et al.(2023b)Wang, Song, and Li</label><mixed-citation>Wang, F., Song, X., and Li, M.: A Deep-Learning-Based Approach for Seismic Surface-Wave Dispersion Inversion (SfNet) with Application to the Chinese Mainland, Earthquake Science, 36, 147–168, <ext-link xlink:href="https://doi.org/10.1016/j.eqs.2023.02.007" ext-link-type="DOI">10.1016/j.eqs.2023.02.007</ext-link>, 2023b.</mixed-citation></ref>
      <ref id="bib1.bibx58"><label>Wang et al.(2025)Wang, Wu, and Zhang</label><mixed-citation>Wang, G., Wu, X., and Zhang, W.: cigChannel: a large-scale 3D seismic dataset with labeled paleochannels for advancing deep learning in seismic interpretation, Earth Syst. Sci. Data, 17, 3447–3471, <ext-link xlink:href="https://doi.org/10.5194/essd-17-3447-2025" ext-link-type="DOI">10.5194/essd-17-3447-2025</ext-link>, 2025.</mixed-citation></ref>
      <ref id="bib1.bibx59"><label>Wathelet et al.(2004)Wathelet, Jongmans, and Ohrnberger</label><mixed-citation>Wathelet, M., Jongmans, D., and Ohrnberger, M.: Surface-wave Inversion Using a Direct Search Algorithm and Its Application to Ambient Vibration Measurements, Near Surf. Geophys., 2, 211–221, <ext-link xlink:href="https://doi.org/10.3997/1873-0604.2004018" ext-link-type="DOI">10.3997/1873-0604.2004018</ext-link>, 2004.</mixed-citation></ref>
      <ref id="bib1.bibx60"><label>Wen and Yu(2023)</label><mixed-citation>Wen, L. and Yu, S.: The China Seismological Reference Model Project, Earth and Planetary Physics, 7, 521–532, <ext-link xlink:href="https://doi.org/10.26464/epp2023078" ext-link-type="DOI">10.26464/epp2023078</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx61"><label>Xia et al.(1999)Xia, Miller, and Park</label><mixed-citation>Xia, J., Miller, R. D., and Park, C. B.: Estimation of Near-surface Shear-wave Velocity by Inversion of Rayleigh Waves, Geophysics, 64, 691–700, <ext-link xlink:href="https://doi.org/10.1190/1.1444578" ext-link-type="DOI">10.1190/1.1444578</ext-link>, 1999.</mixed-citation></ref>
      <ref id="bib1.bibx62"><label>Xiao et al.(2024)Xiao, Cheng, Wu, Wang, Sun, Wang, Ma, Tong, Liang, Tian, Li, Chen, Yu, and Wen</label><mixed-citation>Xiao, X., Cheng, S., Wu, J., Wang, W., Sun, L., Wang, X., Ma, J., Tong, Y., Liang, X., Tian, X., Li, H., Chen, Q.-F., Yu, S., and Wen, L.: CSRM-1.0: A China Seismological Reference Model, J. Geophys. Res.-Sol. Ea., 129, e2024JB029520, <ext-link xlink:href="https://doi.org/10.1029/2024JB029520" ext-link-type="DOI">10.1029/2024JB029520</ext-link>, 2024.</mixed-citation></ref>
      <ref id="bib1.bibx63"><label>Xie et al.(2018)Xie, Chu, and Yang</label><mixed-citation>Xie, J., Chu, R., and Yang, Y.: 3-D Upper-Mantle Shear Velocity Model beneath the Contiguous United States Based on Broadband Surface Wave from Ambient Seismic Noise, Pure Appl. Geophys., 175, 3403–3418, <ext-link xlink:href="https://doi.org/10.1007/s00024-018-1881-2" ext-link-type="DOI">10.1007/s00024-018-1881-2</ext-link>, 2018.</mixed-citation></ref>
      <ref id="bib1.bibx64"><label>Xin et al.(2019)Xin, Zhang, Kang, He, Gao, and Gao</label><mixed-citation>Xin, H., Zhang, H., Kang, M., He, R., Gao, L., and Gao, J.: High-resolution Lithospheric Velocity Structure of Continental China by Double-difference Seismic Travel-time Tomography, Seismol. Res. Lett., 90, 229–241, <ext-link xlink:href="https://doi.org/10.1785/0220180209" ext-link-type="DOI">10.1785/0220180209</ext-link>, 2019.</mixed-citation></ref>
      <ref id="bib1.bibx65"><label>Yablokov et al.(2023)Yablokov, Lugovtsova, and Serdyukov</label><mixed-citation>Yablokov, A., Lugovtsova, Y., and Serdyukov, A.: Uncertainty Quantification of Multimodal Surface Wave Inversion Using Artificial Neural Networks, Geophysics, 88, KS1–KS11, <ext-link xlink:href="https://doi.org/10.1190/geo2022-0261.1" ext-link-type="DOI">10.1190/geo2022-0261.1</ext-link>, 2023.</mixed-citation></ref>
      <ref id="bib1.bibx66"><label>Yablokov et al.(2021)Yablokov, Serdyukov, Loginov, and Baranov</label><mixed-citation>Yablokov, A. V., Serdyukov, A. S., Loginov, G. N., and Baranov, V. D.: An Artificial Neural Network Approach for the Inversion of Surface Wave Dispersion Curves, Geophys. Prospect., 69, 1405–1432, <ext-link xlink:href="https://doi.org/10.1111/1365-2478.13107" ext-link-type="DOI">10.1111/1365-2478.13107</ext-link>, 2021.</mixed-citation></ref>
      <ref id="bib1.bibx67"><label>Yang and Ritzwoller(2008)</label><mixed-citation>Yang, Y. and Ritzwoller, M. H.: Characteristics of Ambient Seismic Noise as a Source for Surface Wave Tomography, Geochem. Geophy. Geosy., 9, 2007GC001814, <ext-link xlink:href="https://doi.org/10.1029/2007GC001814" ext-link-type="DOI">10.1029/2007GC001814</ext-link>, 2008.</mixed-citation></ref>

  </ref-list></back>
    <!--<article-title-html>OpenSWI: a massive-scale benchmark dataset for surface wave dispersion curve inversion</article-title-html>
<abstract-html/>
<ref-html id="bib1.bib1"><label>Aleardi and Stucchi(2021)</label><mixed-citation>
      
Aleardi, M. and Stucchi, E.: A Hybrid Residual Neural Network–Monte Carlo
Approach to Invert Surface Wave Dispersion Data, Near Surf. Geophys., 19,
397–414, <a href="https://doi.org/10.1002/nsg.12163" target="_blank">https://doi.org/10.1002/nsg.12163</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib2"><label>Bensen et al.(2007)Bensen, Ritzwoller, Barmin, Levshin, Lin,
Moschetti, Shapiro, and Yang</label><mixed-citation>
      
Bensen, G. D., Ritzwoller, M. H., Barmin, M. P., Levshin, A. L., Lin, F.,
Moschetti, M. P., Shapiro, N. M., and Yang, Y.: Processing Seismic Ambient
Noise Data to Obtain Reliable Broad-Band Surface Wave Dispersion
Measurements, Geophys. J. Int., 169, 1239–1260,
<a href="https://doi.org/10.1111/j.1365-246X.2007.03374.x" target="_blank">https://doi.org/10.1111/j.1365-246X.2007.03374.x</a>, 2007.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib3"><label>Berg et al.(2020)Berg, Lin, Allam, Schulte-Pelkum, Ward, and
Shen</label><mixed-citation>
      
Berg, E. M., Lin, F.-C., Allam, A., Schulte-Pelkum, V., Ward, K. M., and Shen,
W.: Shear Velocity Model of Alaska via Joint Inversion of Rayleigh Wave
Ellipticity, Phase Velocities, and Receiver Functions across the Alaska
Transportable Array, J. Geophys. Res.-Sol. Ea., 125,
<a href="https://doi.org/10.1029/2019jb018582" target="_blank">https://doi.org/10.1029/2019jb018582</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib4"><label>Blom et al.(2020)Blom, Gokhberg, and Fichtner</label><mixed-citation>
      
Blom, N., Gokhberg, A., and Fichtner, A.: Seismic waveform tomography of the central and eastern Mediterranean upper mantle, Solid Earth, 11, 669–690, <a href="https://doi.org/10.5194/se-11-669-2020" target="_blank">https://doi.org/10.5194/se-11-669-2020</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib5"><label>Brocher(2005)</label><mixed-citation>
      
Brocher, T. M.: Empirical Relations between Elastic Wavespeeds and Density in
the Earth's Crust, B. Seismol. Soc. Am., 95,
2081–2092, <a href="https://doi.org/10.1785/0120050077" target="_blank">https://doi.org/10.1785/0120050077</a>, 2005.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib6"><label>Cai et al.(2022)Cai, Qiu, and Niu</label><mixed-citation>
      
Cai, A., Qiu, H., and Niu, F.: Semi-Supervised Surface Wave Tomography With
Wasserstein Cycle-Consistent GAN: Method and Application to
Southern California Plate Boundary Region, J. Geophys. Res.-Sol. Ea., 127, e2021JB023598, <a href="https://doi.org/10.1029/2021JB023598" target="_blank">https://doi.org/10.1029/2021JB023598</a>,
2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib7"><label>Cao et al.(2020)Cao, Earp, De Ridder, Curtis, and
Galetti</label><mixed-citation>
      
Cao, R., Earp, S., De Ridder, S. A. L., Curtis, A., and Galetti, E.:
Near-Real-Time near-Surface 3D Seismic Velocity and Uncertainty Models by
Wavefield Gradiometry and Neural Network Inversion of Ambient Seismic Noise,
Geophysics, 85, KS13–KS27, <a href="https://doi.org/10.1190/geo2018-0562.1" target="_blank">https://doi.org/10.1190/geo2018-0562.1</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib8"><label>Chen et al.(2024)Chen, Xia, Feng, Pang, and
Zhang</label><mixed-citation>
      
Chen, X., Xia, J., Feng, J., Pang, J., and Zhang, H.: Surface Wave Inversion
Using a Multi-Information Fusion Neural Network, IEEE T.
Geosci. Remote, 62, 1–13, <a href="https://doi.org/10.1109/TGRS.2024.3356663" target="_blank">https://doi.org/10.1109/TGRS.2024.3356663</a>,
2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib9"><label>Chen et al.(2025)Chen, Xia, Feng, Cheng, Pang, and
Hong</label><mixed-citation>
      
Chen, X., Xia, J., Feng, J., Cheng, F., Pang, J., and Hong, Y.: Why Choose
Deep Learning for Surface-Wave Inversion, Surv. Geophys., 46,
695–722, <a href="https://doi.org/10.1007/s10712-025-09882-y" target="_blank">https://doi.org/10.1007/s10712-025-09882-y</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib10"><label>Colli et al.(2013)Colli, Fichtner, and Bunge</label><mixed-citation>
      
Colli, L., Fichtner, A., and Bunge, H.-P.: Full Waveform Tomography of the
Upper Mantle in the South Atlantic Region: Imaging a Westward Fluxing Shallow
Asthenosphere?, Tectonophysics, 604, 26–40,
<a href="https://doi.org/10.1016/j.tecto.2013.06.015" target="_blank">https://doi.org/10.1016/j.tecto.2013.06.015</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib11"><label>Çubuk-Sabuncu et al.(2017)Çubuk-Sabuncu, Taymaz, and
Fichtner</label><mixed-citation>
      
Çubuk-Sabuncu, Y., Taymaz, T., and Fichtner, A.: 3-D Crustal Velocity
Structure of Western Turkey: Constraints from Full-Waveform Tomography,
Phys. Earth Planet. In., 270, 90–112,
<a href="https://doi.org/10.1016/j.pepi.2017.06.014" target="_blank">https://doi.org/10.1016/j.pepi.2017.06.014</a>, 2017.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib12"><label>Deng et al.(2022)Deng, Feng, Wang, Zhang, Jin, Feng, Zeng, Chen, and
Lin</label><mixed-citation>
      
Deng, C., Feng, S., Wang, H., Zhang, X., Jin, P., Feng, Y., Zeng, Q., Chen, Y., and Lin, Y.: OPENFWI: Large-scale multi-structural benchmark datasets for full waveform inversion, Adv. Neur. In., <a href="https://dl.acm.org/doi/10.5555/3600270.3600705" target="_blank"/> (last access: 18 April 2026), 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib13"><label>Feng et al.(2023)Feng, Wang, Deng, Feng, Liu, Zhu, Jin, Chen, and
Lin</label><mixed-citation>
      
Feng, S., Wang, H., Deng, C., Feng, Y., Liu, Y., Zhu, M., Jin, P., Chen, Y.,
and Lin, Y.: EFWI Multiparameter Benchmark Datasets for Elastic Full
Waveform Inversion of Geophysical Properties, Adv. Neur. In., 36, 23701–23713, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib14"><label>Fichtner and Villaseñor(2015)</label><mixed-citation>
      
Fichtner, A. and Villaseñor, A.: Crust and Upper Mantle of the Western
Mediterranean – Constraints from Full-Waveform Inversion, Earth
Planet. Sc. Lett., 428, 52–62, <a href="https://doi.org/10.1016/j.epsl.2015.07.038" target="_blank">https://doi.org/10.1016/j.epsl.2015.07.038</a>,
2015.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib15"><label>Fichtner et al.(2006)Fichtner, Bunge, and
Igel</label><mixed-citation>
      
Fichtner, A., Bunge, H.-P., and Igel, H.: The Adjoint Method in Seismology,
Phys. Earth Planet. In., 157, 86–104,
<a href="https://doi.org/10.1016/j.pepi.2006.03.016" target="_blank">https://doi.org/10.1016/j.pepi.2006.03.016</a>, 2006.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib16"><label>Fichtner et al.(2009)Fichtner, Kennett, Igel, and
Bunge</label><mixed-citation>
      
Fichtner, A., Kennett, B. L. N., Igel, H., and Bunge, H.-P.: Full Seismic
Waveform Tomography for Upper-Mantle Structure in the Australasian Region
Using Adjoint Methods, Geophys. J. Int., 179, 1703–1725,
<a href="https://doi.org/10.1111/j.1365-246x.2009.04368.x" target="_blank">https://doi.org/10.1111/j.1365-246x.2009.04368.x</a>, 2009.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib17"><label>Fichtner et al.(2010)Fichtner, Kennett, Igel, and
Bunge</label><mixed-citation>
      
Fichtner, A., Kennett, B. L., Igel, H., and Bunge, H.-P.: Full Waveform
Tomography for Radially Anisotropic Structure: New Insights into Present and
Past States of the Australasian Upper Mantle, Earth Planet. Sc.
Lett., 290, 270–280, <a href="https://doi.org/10.1016/j.epsl.2009.12.003" target="_blank">https://doi.org/10.1016/j.epsl.2009.12.003</a>, 2010.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib18"><label>Fichtner et al.(2013)Fichtner, Trampert, Cupillard, Saygin, Taymaz,
Capdeville, and Villaseñor</label><mixed-citation>
      
Fichtner, A., Trampert, J., Cupillard, P., Saygin, E., Taymaz, T., Capdeville,
Y., and Villaseñor, A.: Multiscale Full Waveform Inversion, Geophys. J. Int., 194, 534–556, <a href="https://doi.org/10.1093/gji/ggt118" target="_blank">https://doi.org/10.1093/gji/ggt118</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib19"><label>Fichtner et al.(2018)Fichtner, van Herwaarden, Afanasiev,
Simutė, Krischer, Çubuk-Sabuncu, Taymaz, Colli, Saygin,
Villaseñor, Trampert, Cupillard, Bunge, and
Igel</label><mixed-citation>
      
Fichtner, A., van Herwaarden, D.-P., Afanasiev, M., Simutė, S., Krischer,
L., Çubuk-Sabuncu, Y., Taymaz, T., Colli, L., Saygin, E.,
Villaseñor, A., Trampert, J., Cupillard, P., Bunge, H.-P., and Igel, H.:
The Collaborative Seismic Earth Model: Generation 1, Geophys. Res.
Lett., 45, 4007–4016, <a href="https://doi.org/10.1029/2018gl077338" target="_blank">https://doi.org/10.1029/2018gl077338</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib20"><label>Foti et al.(2009)Foti, Comina, Boiero, and
Socco</label><mixed-citation>
      
Foti, S., Comina, C., Boiero, D., and Socco, L.: Non-Uniqueness in Surface-Wave
Inversion and Consequences on Seismic Site Response Analyses, Soil Dyn.
Earthq. Eng., 29, 982–993,
<a href="https://doi.org/10.1016/j.soildyn.2008.11.004" target="_blank">https://doi.org/10.1016/j.soildyn.2008.11.004</a>, 2009.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib21"><label>Foti et al.(2014)Foti, Lai, Rix, and Strobbia</label><mixed-citation>
      
Foti, S., Lai, C., Rix, G. J., and Strobbia, C.: Surface Wave Methods for
Near-Surface Site Characterization, CRC Press,  ISBN
978-0-429-17853-5, <a href="https://doi.org/10.1201/b17268" target="_blank">https://doi.org/10.1201/b17268</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib22"><label>Fu et al.(2022)Fu, Pan, Li, Dong, Ma, and Chen</label><mixed-citation>
      
Fu, L., Pan, L., Li, Z., Dong, S., Ma, Q., and Chen, X.: Improved
High-resolution 3D vs Model of Long Beach, CA: Inversion of
Multimodal Dispersion Curves from Ambient Noise of a Dense Array, Geophys.
Res. Lett., 49, e2021GL097619, <a href="https://doi.org/10.1029/2021GL097619" target="_blank">https://doi.org/10.1029/2021GL097619</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib23"><label>Gan et al.(2024)Gan, Yang, Pan, Sun, Zhang, Gao, and
Chen</label><mixed-citation>
      
Gan, Y., Yang, Z., Pan, L., Sun, Y.-C., Zhang, D., Gao, Y., and Chen, X.: Deep
Learning-Based Dispersion Spectrum Inversion for Surface Wave
Exploration, IEEE T. Geosci. Remote, 62, 1–11,
<a href="https://doi.org/10.1109/TGRS.2024.3399033" target="_blank">https://doi.org/10.1109/TGRS.2024.3399033</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib24"><label>Gao et al.(2025)Gao, Wu, Sun, Hou, Gao, Wang, and
Sheng</label><mixed-citation>
      
Gao, H., Wu, X., Sun, X., Hou, M., Gao, H., Wang, G., and Sheng, H.: cigFacies: a massive-scale benchmark dataset of seismic facies and its application, Earth Syst. Sci. Data, 17, 595–609, <a href="https://doi.org/10.5194/essd-17-595-2025" target="_blank">https://doi.org/10.5194/essd-17-595-2025</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib25"><label>Haskell(1953)</label><mixed-citation>
      
Haskell, N. A.: The Dispersion of Surface Waves on Multilayered Media, in:
Vincit Veritas: A Portrait of the Life and Work of Norman
Abraham Haskell, 1905–1970, edited by Ben-Menahem, A., vol. 43,
American Geophysical Union, Washington, D. C., 86–103, ISBN
978-0-87590-762-8, <a href="https://doi.org/10.1785/BSSA0430010017" target="_blank">https://doi.org/10.1785/BSSA0430010017</a>, 1953.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib26"><label>Herrmann(2013)</label><mixed-citation>
      
Herrmann, R. B.: Computer Programs in Seismology: An Evolving Tool for
Instruction and Research, Seismol. Res. Lett., 84, 1081–1088,
<a href="https://doi.org/10.1785/0220110096" target="_blank">https://doi.org/10.1785/0220110096</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib27"><label>Ho et al.(2020)Ho, Jain, and Abbeel</label><mixed-citation>
      
Ho, J., Jain, A., and Abbeel, P.: Denoising Diffusion Probabilistic Models, arXiv [preprint],
<a href="https://doi.org/10.48550/arXiv.2006.11239" target="_blank">https://doi.org/10.48550/arXiv.2006.11239</a>, 2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib28"><label>Hu et al.(2020)Hu, Qiu, Zhang, and Ben-Zion</label><mixed-citation>
      
Hu, J., Qiu, H., Zhang, H., and Ben-Zion, Y.: Using Deep Learning to
Derive Shear-Wave Velocity Models from Surface-Wave Dispersion Data,
Seismol. Res. Lett., 91, 1738–1751, <a href="https://doi.org/10.1785/0220190222" target="_blank">https://doi.org/10.1785/0220190222</a>,
2020.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib29"><label>Huang et al.(2024)Huang, Yu, Wang, and Wang</label><mixed-citation>
      
Huang, X., Yu, Z., Wang, W., and Wang, F.: JointNet: A Multimodal Deep
Learning-Based Approach for Joint Inversion of Rayleigh Wave
Dispersion and Ellipticity, B. Seismol. Soc. Am., 114, 627–641, <a href="https://doi.org/10.1785/0120230199" target="_blank">https://doi.org/10.1785/0120230199</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib30"><label>Jiang et al.(2025)Jiang, Ma, Ning, Li, Wu, and
Bao</label><mixed-citation>
      
Jiang, Y., Ma, J., Ning, J., Li, J., Wu, H., and Bao, T.: One-Fit-All
Transformer for Multimodal Geophysical Inversion: Method and
Application, Journal of Geophysical Research: Machine Learning and
Computation, 2, e2024JH000432, <a href="https://doi.org/10.1029/2024JH000432" target="_blank">https://doi.org/10.1029/2024JH000432</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib31"><label>Krischer et al.(2018)Krischer, Fichtner, Boehm, and
Igel</label><mixed-citation>
      
Krischer, L., Fichtner, A., Boehm, C., and Igel, H.: Automated Large-scale Full
Seismic Waveform Inversion for North America and the North Atlantic,
J. Geophys. Res.-Sol. Ea., 123, 5902–5928,
<a href="https://doi.org/10.1029/2017JB015289" target="_blank">https://doi.org/10.1029/2017JB015289</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib32"><label>Lehmann et al.(2024)Lehmann, Gatti, Bertin, and
Clouteau</label><mixed-citation>
      
Lehmann, F., Gatti, F., Bertin, M., and Clouteau, D.: Synthetic ground motions in heterogeneous geologies from various sources: the HEMEWS-3D database, Earth Syst. Sci. Data, 16, 3949–3972, <a href="https://doi.org/10.5194/essd-16-3949-2024" target="_blank">https://doi.org/10.5194/essd-16-3949-2024</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib33"><label>Lin et al.(2013)Lin, Li, Clayton, and
Hollis</label><mixed-citation>
      
Lin, F.-C., Li, D., Clayton, R. W., and Hollis, D.: High-Resolution 3D
Shallow Crustal Structure in Long Beach, California: Application of
Ambient Noise Tomography on a Dense Seismic Array, Geophysics, 78, Q45–Q56,
<a href="https://doi.org/10.1190/geo2012-0453.1" target="_blank">https://doi.org/10.1190/geo2012-0453.1</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib34"><label>Liu(2025a)</label><mixed-citation>
      
Liu, F.: OpenSWI-dataset, Zenodo [data set], <a href="https://doi.org/10.5281/zenodo.16874111" target="_blank">https://doi.org/10.5281/zenodo.16874111</a>, 2025a.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib35"><label>Liu(2025b)</label><mixed-citation>
      
Liu, F.: OpenSWI-toolbox, Zenodo [code], <a href="https://doi.org/10.5281/zenodo.16884901" target="_blank">https://doi.org/10.5281/zenodo.16884901</a>, 2025b.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib36"><label>Liu et al.(2024)Liu, Li, Fu, and Lu</label><mixed-citation>
      
Liu, F., Li, J., Fu, L., and Lu, L.: Multimodal Surface Wave Inversion with
Automatic Differentiation, Geophys. J. Int., 238, 290–312,
<a href="https://doi.org/10.1093/gji/ggae155" target="_blank">https://doi.org/10.1093/gji/ggae155</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib37"><label>Liu et al.(2025)Liu, Deng, Su, Bai, and Ouyang</label><mixed-citation>
      
Liu, F., Deng, B., Su, R., Bai, L., and Ouyang, W.: DispFormer: Pretrained
Transformer for Flexible Dispersion Curve Inversion from Global Synthesis to
Regional Applications, arXiv [preprint], <a href="https://doi.org/10.48550/ARXIV.2501.04366" target="_blank">https://doi.org/10.48550/ARXIV.2501.04366</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib38"><label>Lu et al.(2018)Lu, Stehly, Paul, and AlpArray Working
Group</label><mixed-citation>
      
Lu, Y., Stehly, L., Paul, A., and AlpArray Working Group: High-Resolution
Surface Wave Tomography of the European Crust and Uppermost Mantle from
Ambient Seismic Noise, Geophys. J. Int., 214, 1136–1150,
<a href="https://doi.org/10.1093/gji/ggy188" target="_blank">https://doi.org/10.1093/gji/ggy188</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib39"><label>Luo et al.(2022)Luo, Huang, Yang, Zhao, Yang, and
Xu</label><mixed-citation>
      
Luo, Y., Huang, Y., Yang, Y., Zhao, K., Yang, X., and Xu, H.: Constructing
Shear Velocity Models from Surface Wave Dispersion Curves Using Deep
Learning, J. Appl. Geophys., 196, 104524,
<a href="https://doi.org/10.1016/j.jappgeo.2021.104524" target="_blank">https://doi.org/10.1016/j.jappgeo.2021.104524</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib40"><label>Merrifield et al.(2022)Merrifield, Griffith, Zamanian, Gesbert, Sen,
De La Torre Guzman, Potter, and Kuehl</label><mixed-citation>
      
Merrifield, T. P., Griffith, D. P., Zamanian, S. A., Gesbert, S., Sen, S.,
De La Torre Guzman, J., Potter, R. D., and Kuehl, H.: Synthetic Seismic Data
for Training Deep Learning Networks, Interpretation, 10, SE31–SE39,
<a href="https://doi.org/10.1190/INT-2021-0193.1" target="_blank">https://doi.org/10.1190/INT-2021-0193.1</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib41"><label>Michelini et al.(2021)Michelini, Cianetti, Gaviano, Giunchi,
Jozinović, and Lauciani</label><mixed-citation>
      
Michelini, A., Cianetti, S., Gaviano, S., Giunchi, C., Jozinović, D., and Lauciani, V.: INSTANCE – the Italian seismic dataset for machine learning, Earth Syst. Sci. Data, 13, 5509–5544, <a href="https://doi.org/10.5194/essd-13-5509-2021" target="_blank">https://doi.org/10.5194/essd-13-5509-2021</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib42"><label>Mousavi et al.(2019)Mousavi, Sheng, Zhu, and
Beroza</label><mixed-citation>
      
Mousavi, S. M., Sheng, Y., Zhu, W., and Beroza, G. C.: STanford EArthquake
Dataset (STEAD): A Global Data Set of Seismic Signals for AI,
IEEE Access, 7, 179464–179476, <a href="https://doi.org/10.1109/ACCESS.2019.2947848" target="_blank">https://doi.org/10.1109/ACCESS.2019.2947848</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib43"><label>Park et al.(1999)Park, Miller, and Xia</label><mixed-citation>
      
Park, C. B., Miller, R. D., and Xia, J.: Multichannel Analysis of Surface
Waves, Geophysics, 64, 800–808, <a href="https://doi.org/10.1190/1.1444590" target="_blank">https://doi.org/10.1190/1.1444590</a>, 1999.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib44"><label>Pasyanos et al.(2014)Pasyanos, Masters, Laske, and
Ma</label><mixed-citation>
      
Pasyanos, M. E., Masters, T. G., Laske, G., and Ma, Z.: LITHO1.0: An
Updated Crust and Lithospheric Model of the Earth, J. Geophys.
Res.-Sol. Ea., 119, 2153–2173, <a href="https://doi.org/10.1002/2013JB010626" target="_blank">https://doi.org/10.1002/2013JB010626</a>, 2014.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib45"><label>Reid et al.(2025)Reid, Olivier, and Jones</label><mixed-citation>
      
Reid, A., Olivier, G., and Jones, T.: Ambient Noise Tomography: A
Sensitive, Rapid, Passive Seismic Technique for Mineral
Exploration, SEG Discovery, 17–26,
<a href="https://doi.org/10.5382/SEGnews.2025-140.fea-01" target="_blank">https://doi.org/10.5382/SEGnews.2025-140.fea-01</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib46"><label>Rickers et al.(2013)Rickers, Fichtner, and
Trampert</label><mixed-citation>
      
Rickers, F., Fichtner, A., and Trampert, J.: The Iceland–Jan Mayen Plume
System and Its Impact on Mantle Dynamics in the North Atlantic Region:
Evidence from Full-Waveform Inversion, Earth Planet. Sc. Lett.,
367, 39–51, <a href="https://doi.org/10.1016/j.epsl.2013.02.022" target="_blank">https://doi.org/10.1016/j.epsl.2013.02.022</a>, 2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib47"><label>Shapiro and Campillo(2004)</label><mixed-citation>
      
Shapiro, N. M. and Campillo, M.: Emergence of Broadband Rayleigh Waves from
Correlations of the Ambient Seismic Noise, Geophys. Res. Lett., 31,
2004GL019491, <a href="https://doi.org/10.1029/2004GL019491" target="_blank">https://doi.org/10.1029/2004GL019491</a>, 2004.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib48"><label>Shapiro and Ritzwoller(2002)</label><mixed-citation>
      
Shapiro, N. M. and Ritzwoller, M. H.: Monte-Carlo Inversion for a Global
Shear-Velocity Model of the Crust and Upper Mantle, Geophys. J. Int., 151, 88–105, <a href="https://doi.org/10.1046/j.1365-246X.2002.01742.x" target="_blank">https://doi.org/10.1046/j.1365-246X.2002.01742.x</a>, 2002.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib49"><label>Shen et al.(2013)Shen, Ritzwoller, and Schulte-Pelkum</label><mixed-citation>
      
Shen, W., Ritzwoller, M. H., and Schulte-Pelkum, V.: A 3-D Model of the
Crust and Uppermost Mantle beneath the Central and Western US by
Joint Inversion of Receiver Functions and Surface Wave Dispersion, J.
Geophys. Res.-Sol. Ea., 118, 262–276, <a href="https://doi.org/10.1029/2012JB009602" target="_blank">https://doi.org/10.1029/2012JB009602</a>,
2013.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib50"><label>Shen et al.(2016)Shen, Ritzwoller, Kang, Kim, Lin, Ning, Wang, Zheng,
and Zhou</label><mixed-citation>
      
Shen, W., Ritzwoller, M. H., Kang, D., Kim, Y., Lin, F.-C., Ning, J., Wang, W.,
Zheng, Y., and Zhou, L.: A Seismic Reference Model for the Crust and
Uppermost Mantle beneath China from Surface Wave Dispersion, Geophys. J. Int., 206, 954–979, <a href="https://doi.org/10.1093/gji/ggw175" target="_blank">https://doi.org/10.1093/gji/ggw175</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib51"><label>Simutė et al.(2016)</label><mixed-citation>
      
Simutė, S., Steptoe, H., Cobden, L., Gokhberg, A., and Fichtner, A.: Full-waveform inversion of the Japanese islands region, J. Geophys. Res.-Sol. Ea., 121, 3722–3741, <a href="https://doi.org/10.1002/2016jb012802" target="_blank">https://doi.org/10.1002/2016jb012802</a>, 2016.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib52"><label>Socco and Strobbia(2004)</label><mixed-citation>
      
Socco, L. and Strobbia, C.: Surface-wave Method for Near-surface
Characterization: A Tutorial, Near Surf. Geophys., 2, 165–185,
<a href="https://doi.org/10.3997/1873-0604.2004015" target="_blank">https://doi.org/10.3997/1873-0604.2004015</a>, 2004.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib53"><label>Taufik et al.(2024)Taufik, Wang, and
Alkhalifah</label><mixed-citation>
      
Taufik, M. H., Wang, F., and Alkhalifah, T.: Learned Regularizations for
Multi-Parameter Elastic Full Waveform Inversion Using Diffusion
Models, Journal of Geophysical Research: Machine Learning and Computation,
1, e2024JH000125, <a href="https://doi.org/10.1029/2024JH000125" target="_blank">https://doi.org/10.1029/2024JH000125</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib54"><label>Thomson(1950)</label><mixed-citation>
      
Thomson, W. T.: Transmission of Elastic Waves through a Stratified Solid
Medium, J. Appl. Phys., 21, 89–93, <a href="https://doi.org/10.1063/1.1699629" target="_blank">https://doi.org/10.1063/1.1699629</a>,
1950.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib55"><label>Wang et al.(2022)Wang, Song, and Li</label><mixed-citation>
      
Wang, F., Song, X., and Li, J.: Deep Learning-Based <i>H</i><sub>−<i>κ</i></sub> Method (HkNet) for Estimating Crustal Thickness and <i>V</i><i>p</i>∕<i>V</i><i>s</i> Ratio From Receiver Functions, J. Geophys. Res.-Sol. Ea., 127,
e2022JB023944, <a href="https://doi.org/10.1029/2022JB023944" target="_blank">https://doi.org/10.1029/2022JB023944</a>, 2022.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib56"><label>Wang et al.(2023a)Wang, Huang, and
Alkhalifah</label><mixed-citation>
      
Wang, F., Huang, X., and Alkhalifah, T. A.: A Prior Regularized Full Waveform
Inversion Using Generative Diffusion Models, IEEE T. Geosci. Remote, 61, 1–11, <a href="https://doi.org/10.1109/tgrs.2023.3337014" target="_blank">https://doi.org/10.1109/tgrs.2023.3337014</a>,
2023a.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib57"><label>Wang et al.(2023b)Wang, Song, and
Li</label><mixed-citation>
      
Wang, F., Song, X., and Li, M.: A Deep-Learning-Based Approach for Seismic
Surface-Wave Dispersion Inversion (SfNet) with Application to the
Chinese Mainland, Earthquake Science, 36, 147–168,
<a href="https://doi.org/10.1016/j.eqs.2023.02.007" target="_blank">https://doi.org/10.1016/j.eqs.2023.02.007</a>, 2023b.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib58"><label>Wang et al.(2025)Wang, Wu, and Zhang</label><mixed-citation>
      
Wang, G., Wu, X., and Zhang, W.: cigChannel: a large-scale 3D seismic dataset with labeled paleochannels for advancing deep learning in seismic interpretation, Earth Syst. Sci. Data, 17, 3447–3471, <a href="https://doi.org/10.5194/essd-17-3447-2025" target="_blank">https://doi.org/10.5194/essd-17-3447-2025</a>, 2025.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib59"><label>Wathelet et al.(2004)Wathelet, Jongmans, and
Ohrnberger</label><mixed-citation>
      
Wathelet, M., Jongmans, D., and Ohrnberger, M.: Surface-wave Inversion Using a
Direct Search Algorithm and Its Application to Ambient Vibration
Measurements, Near Surf. Geophys., 2, 211–221,
<a href="https://doi.org/10.3997/1873-0604.2004018" target="_blank">https://doi.org/10.3997/1873-0604.2004018</a>, 2004.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib60"><label>Wen and Yu(2023)</label><mixed-citation>
      
Wen, L. and Yu, S.: The China Seismological Reference Model Project, Earth and
Planetary Physics, 7, 521–532, <a href="https://doi.org/10.26464/epp2023078" target="_blank">https://doi.org/10.26464/epp2023078</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib61"><label>Xia et al.(1999)Xia, Miller, and Park</label><mixed-citation>
      
Xia, J., Miller, R. D., and Park, C. B.: Estimation of Near-surface Shear-wave
Velocity by Inversion of Rayleigh Waves, Geophysics, 64, 691–700,
<a href="https://doi.org/10.1190/1.1444578" target="_blank">https://doi.org/10.1190/1.1444578</a>, 1999.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib62"><label>Xiao et al.(2024)Xiao, Cheng, Wu, Wang, Sun, Wang, Ma, Tong, Liang,
Tian, Li, Chen, Yu, and Wen</label><mixed-citation>
      
Xiao, X., Cheng, S., Wu, J., Wang, W., Sun, L., Wang, X., Ma, J., Tong, Y.,
Liang, X., Tian, X., Li, H., Chen, Q.-F., Yu, S., and Wen, L.: CSRM-1.0:
A China Seismological Reference Model, J. Geophys. Res.-Sol. Ea., 129, e2024JB029520, <a href="https://doi.org/10.1029/2024JB029520" target="_blank">https://doi.org/10.1029/2024JB029520</a>, 2024.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib63"><label>Xie et al.(2018)Xie, Chu, and Yang</label><mixed-citation>
      
Xie, J., Chu, R., and Yang, Y.: 3-D Upper-Mantle Shear Velocity Model
beneath the Contiguous United States Based on Broadband Surface Wave from
Ambient Seismic Noise, Pure Appl. Geophys., 175, 3403–3418,
<a href="https://doi.org/10.1007/s00024-018-1881-2" target="_blank">https://doi.org/10.1007/s00024-018-1881-2</a>, 2018.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib64"><label>Xin et al.(2019)Xin, Zhang, Kang, He, Gao, and
Gao</label><mixed-citation>
      
Xin, H., Zhang, H., Kang, M., He, R., Gao, L., and Gao, J.: High-resolution
Lithospheric Velocity Structure of Continental China by Double-difference
Seismic Travel-time Tomography, Seismol. Res. Lett., 90, 229–241,
<a href="https://doi.org/10.1785/0220180209" target="_blank">https://doi.org/10.1785/0220180209</a>, 2019.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib65"><label>Yablokov et al.(2023)Yablokov, Lugovtsova, and
Serdyukov</label><mixed-citation>
      
Yablokov, A., Lugovtsova, Y., and Serdyukov, A.: Uncertainty Quantification of
Multimodal Surface Wave Inversion Using Artificial Neural Networks,
Geophysics, 88, KS1–KS11, <a href="https://doi.org/10.1190/geo2022-0261.1" target="_blank">https://doi.org/10.1190/geo2022-0261.1</a>, 2023.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib66"><label>Yablokov et al.(2021)Yablokov, Serdyukov, Loginov, and
Baranov</label><mixed-citation>
      
Yablokov, A. V., Serdyukov, A. S., Loginov, G. N., and Baranov, V. D.: An
Artificial Neural Network Approach for the Inversion of Surface Wave
Dispersion Curves, Geophys. Prospect., 69, 1405–1432,
<a href="https://doi.org/10.1111/1365-2478.13107" target="_blank">https://doi.org/10.1111/1365-2478.13107</a>, 2021.

    </mixed-citation></ref-html>
<ref-html id="bib1.bib67"><label>Yang and Ritzwoller(2008)</label><mixed-citation>
      
Yang, Y. and Ritzwoller, M. H.: Characteristics of Ambient Seismic Noise as a
Source for Surface Wave Tomography, Geochem. Geophy. Geosy., 9,
2007GC001814, <a href="https://doi.org/10.1029/2007GC001814" target="_blank">https://doi.org/10.1029/2007GC001814</a>, 2008.

    </mixed-citation></ref-html>--></article>
