From 8978f0727359d7d3b61a51c8ff8fa0c08633e34f Mon Sep 17 00:00:00 2001 From: Mathew Biddle <8480023+MathewBiddle@users.noreply.github.com> Date: Thu, 11 Sep 2025 15:06:02 -0400 Subject: [PATCH 1/2] Starting reconcile notebook --- .../2025-09-11-reconcile_RA_NCEI.ipynb | 1475 +++++++++++++++++ 1 file changed, 1475 insertions(+) create mode 100644 jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb diff --git a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb new file mode 100644 index 00000000..0197e8dd --- /dev/null +++ b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb @@ -0,0 +1,1475 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "e58d3e48", + "metadata": {}, + "source": [ + "Do some matching between records at NCEI and records available through the IOOS data catalog. Essentially come up with a list of datasets that aren't at NCEI.\n", + "\n", + "Outline of process:\n", + "1. Build a dataframe of non-federal buoy datasets and metadata from the IOOS Catalog.\n", + "2. Use that dataframe to search NCEI for matching datasets affiliated with IOOS.\n", + "3. Identify which datasets are not at NCEI that should be.\n", + "\n", + "Borrow code from:\n", + "* https://ioos.github.io/ioos_code_lab/content/code_gallery/data_access_notebooks/2017-06-12-NCEI_RA_archive_history.html\n", + "* https://ioos.github.io/ioos_code_lab/content/code_gallery/data_access_notebooks/2024-09-17-CKAN_API_Query.html" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "66baa186", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Mathew.Biddle\\programs\\miniforge3\\envs\\IOOS\\Lib\\site-packages\\ckanapi\\version.py:1: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " import pkg_resources\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from ckanapi import RemoteCKAN\n", + "\n", + "ioos_catalog = RemoteCKAN(\n", + " address=\"https://data.ioos.us\",\n", + " user_agent=\"ckanapiioos/1.0 (+https://ioos.us/)\",\n", + ")\n", + "\n", + "\n", + "ioos_catalog" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3bea33bf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['aoos', 'caricoos', 'cdip', 'cencoos', 'comt', 'gcoos', 'glider-dac', 'glos', 'hf-radar-dac', 'ioos', 'maracoos', 'nanoos', 'neracoos', 'noaa-co-ops', 'noaa-ndbc', 'oceansites', 'pacioos', 'sccoos', 'secoora', 'unidata', 'usgs', 'us-navy']\n" + ] + } + ], + "source": [ + "orgs = ioos_catalog.action.organization_list()\n", + "print(orgs)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c460b610", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "44142" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasets = ioos_catalog.action.package_search()\n", + "datasets[\"count\"]" + ] + }, + { + "cell_type": "markdown", + "id": "a7f29d98", + "metadata": {}, + "source": [ + "## 2 options\n", + "\n", + "1. go accession by accession and extract the following info:\n", + "\n", + "Let's do some testing with the following NCEI accession:\n", + "https://www.ncei.noaa.gov/data/oceans/ncei/archive/metadata/approved/granule/0171311.xml\n", + "\n", + "```xml\n", + "\n", + "\n", + "\n", + "Indian Island station\n", + "```\n", + "\n", + "```xml\n", + "\n", + "\n", + "\n", + "Central and Northern California Ocean Observing System\n", + "```\n", + "\n", + "2. Use the collection level records to get what we need.\n", + "\n", + "For example, CeNCOOS:\n", + "https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-CeNCOOS;view=xml;responseType=text/xml\n", + "\n", + "```xml\n", + "\n", + "\n", + "\n", + "Bodega Marine Laboratory seawater intake, Horseshoe Cove station,\n", + "\n", + "\n", + "Cal Poly Pier San Luis Obispo station\n", + "\n", + "\n", + "California Maritime pier Carquinez shore station\n", + "\n", + "\n", + "Fort Point Pier station\n", + "\n", + "\n", + "Hog Island Oyster Company Burkolator, Tomales Bay,\n", + "\n", + "\n", + "Humboldt Bay Pier station\n", + "\n", + "\n", + "Humboldt Dock B Shore Station\n", + "\n", + "\n", + "Indian Island station\n", + "\n", + "\n", + "Monterey Bay Commercial Wharf station\n", + "\n", + "\n", + "Morro Bay (BS1) station\n", + "\n", + "\n", + "Morro Bay station\n", + "\n", + "\n", + "Moss Landing Marine Laboratory Seawater Intake Monitoring Station\n", + "\n", + "\n", + "Romberg Tiburon Center Pier station\n", + "\n", + "\n", + "Santa Cruz municipal wharf station\n", + "\n", + "\n", + "Trinidad Head station\n", + "\n", + "\n", + "platform\n", + "\n", + "\n", + "\n", + "\n", + "Provider Platform Names\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "b7de9f4b", + "metadata": {}, + "source": [ + "## Grab info from NCEI" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c9a2649", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "f0af3d01", + "metadata": {}, + "source": [ + "## Do the searching\n" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "d7757948", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "num_results: 1, result_count: 0\n", + "num_results: 1, result_count: 1\n", + "num_results: 1, result_count: 1, total_result_count: 1\n" + ] + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "import json\n", + "\n", + "from ckanapi import RemoteCKAN\n", + "from ckanapi.errors import CKANAPIError\n", + "from requests.exceptions import ChunkedEncodingError\n", + "from urllib3.exceptions import IncompleteRead\n", + "\n", + "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", + "\n", + "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", + "\n", + "df_ioos_catalog = pd.DataFrame()\n", + "df_plat = pd.DataFrame()\n", + "\n", + "result_count = 0\n", + "\n", + "platforms = [\"Elliott Point\"]\n", + "orgs = [\"NANOOS\"]\n", + "\n", + "for org in orgs:\n", + " org_ncei = org.lower()\n", + "\n", + " for platform in platforms:\n", + "\n", + " platform_ncei = platform\n", + "\n", + " filter_query = f\"organization:{org_ncei.lower()}\"\n", + "\n", + " free_text_query = f\"{platform_ncei.lower()}\"\n", + "\n", + " # ioos_catalog.action.package_search(\n", + " # fq=filter_query, \n", + " # q=free_text_query, \n", + " # rows=500, \n", + " # start=result_count,\n", + " # )\n", + "\n", + " while True:\n", + " try:\n", + " datasets = ioos_catalog.action.package_search(\n", + " fq=filter_query, \n", + " q=free_text_query, \n", + " rows=500, \n", + " start=result_count,\n", + " )\n", + " except (CKANAPIError, IncompleteRead, ChunkedEncodingError):\n", + " continue\n", + "\n", + " #result_count = datasets.shape[0]\n", + "\n", + " num_results = datasets[\"count\"]\n", + " \n", + " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", + "\n", + " for dataset in datasets[\"results\"]:\n", + " \n", + " # maybe just read all metadata into a DataFrame.\n", + " df = pd.DataFrame.from_dict(dataset, orient='index').T\n", + "\n", + " # for entry in dataset['extras']:\n", + " # if entry['key'] == 'temporal-extent-begin':\n", + " # start_date = entry['value']\n", + " # elif entry['key'] == 'temporal-extent-end':\n", + " # end_date= entry['value']\n", + " # elif entry['key'] == 'aggregation-info':\n", + " # my_list = json.loads(entry['value'])\n", + " # my_dict = {i: my_list[i] for i in range(len(my_list))}\n", + " # for agg in my_dict.keys():\n", + " # if my_dict[agg]['aggregate-dataset-identifier'] != \"\":\n", + " # dtype = my_dict[agg]['aggregate-dataset-identifier']\n", + " \n", + " # df = pd.DataFrame(\n", + " # {\n", + " # \"title\": [dataset[\"title\"]],\n", + " # \"url\": [dataset[\"resources\"][0][\"url\"]],\n", + " # \"org\": [dataset[\"organization\"][\"title\"]],\n", + " # \"platform\": platform_ncei,\n", + " # 'start_date':start_date,\n", + " # 'end_date':end_date,\n", + " # 'datatype': dtype,\n", + "\n", + " # }\n", + " # )\n", + "\n", + " df_plat = pd.concat([df_plat, df], ignore_index=True)\n", + " \n", + " result_count = df_plat.shape[0]\n", + "\n", + " if result_count >= num_results:\n", + " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", + " break\n", + " \n", + " df_ioos_catalog = pd.concat([df_ioos_catalog, df_plat], ignore_index=True)\n", + "\n", + " print(\n", + " f\"num_results: {num_results}, result_count: {result_count}, total_result_count: {df_ioos_catalog.shape[0]}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "67f79caa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
authorauthor_emailcreator_user_ididisopenlicense_idlicense_titlemaintainermaintainer_emailmetadata_created...titletypeurlversionextrasresourcestagsgroupsrelationships_as_subjectrelationships_as_object
0NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75856b00a5-4c7d-4f8c-8244-e77fb85e793eFalseNoneNoneNoneNone2025-01-09T13:27:21.796623...(CMOP) Elliott PointdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
\n", + "

1 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " author author_email creator_user_id \\\n", + "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "\n", + " id isopen license_id license_title \\\n", + "0 856b00a5-4c7d-4f8c-8244-e77fb85e793e False None None \n", + "\n", + " maintainer maintainer_email metadata_created ... \\\n", + "0 None None 2025-01-09T13:27:21.796623 ... \n", + "\n", + " title type url version \\\n", + "0 (CMOP) Elliott Point dataset None None \n", + "\n", + " extras \\\n", + "0 [{'key': 'access-constraints', 'value': '[]'},... \n", + "\n", + " resources \\\n", + "0 [{'cache_last_updated': None, 'cache_url': Non... \n", + "\n", + " tags groups \\\n", + "0 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "\n", + " relationships_as_subject relationships_as_object \n", + "0 [] [] \n", + "\n", + "[1 rows x 29 columns]" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ioos_catalog" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "b9b2a4d8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
authorauthor_emailcreator_user_ididisopenlicense_idlicense_titlemaintainermaintainer_emailmetadata_created...titletypeurlversionextrasresourcestagsgroupsrelationships_as_subjectrelationships_as_object
0NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75856b00a5-4c7d-4f8c-8244-e77fb85e793eFalseNoneNoneNoneNone2025-01-09T13:27:21.796623...(CMOP) Elliott PointdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
\n", + "

1 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " author author_email creator_user_id \\\n", + "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "\n", + " id isopen license_id license_title \\\n", + "0 856b00a5-4c7d-4f8c-8244-e77fb85e793e False None None \n", + "\n", + " maintainer maintainer_email metadata_created ... \\\n", + "0 None None 2025-01-09T13:27:21.796623 ... \n", + "\n", + " title type url version \\\n", + "0 (CMOP) Elliott Point dataset None None \n", + "\n", + " extras \\\n", + "0 [{'key': 'access-constraints', 'value': '[]'},... \n", + "\n", + " resources \\\n", + "0 [{'cache_last_updated': None, 'cache_url': Non... \n", + "\n", + " tags groups \\\n", + "0 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "\n", + " relationships_as_subject relationships_as_object \n", + "0 [] [] \n", + "\n", + "[1 rows x 29 columns]" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_dict(dataset, orient='index').T" + ] + }, + { + "cell_type": "markdown", + "id": "b77305da", + "metadata": {}, + "source": [ + "## Query IOOS Catalog for appropriate datasets\n", + "\n", + "Gather all the datasets associated with an RA and filter to just buoys and similar platforms." + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "12318daf", + "metadata": {}, + "outputs": [], + "source": [ + "def ioos_ckan_query(ioos_catalog, filter_query, free_text_query):\n", + " '''\n", + " Function to query the IOOS catalog with a filter query and free text query.\n", + "\n", + " ioos_catalog : RemoteCKAN object\n", + " The RemoteCKAN object to use for querying the IOOS catalog.\n", + " filter_query : str\n", + " The filter query to use for querying the IOOS catalog.\n", + " free_text_query : str\n", + " The free text query to use for querying the IOOS catalog.\n", + " Returns\n", + " -------\n", + " df_plat : pandas.DataFrame\n", + " A DataFrame containing the results of the query.\n", + " '''\n", + "\n", + " df_plat = pd.DataFrame()\n", + "\n", + " result_count = 0\n", + " while True:\n", + " try:\n", + " datasets = ioos_catalog.action.package_search(\n", + " fq=filter_query, \n", + " q=free_text_query, \n", + " rows=500, \n", + " start=result_count,\n", + " )\n", + " except (CKANAPIError, IncompleteRead, ChunkedEncodingError):\n", + " continue\n", + "\n", + " #result_count = datasets.shape[0]\n", + "\n", + " num_results = datasets[\"count\"]\n", + " \n", + " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", + "\n", + " for dataset in datasets[\"results\"]:\n", + " df = pd.DataFrame.from_dict(dataset, orient='index').T\n", + " # dtype = None\n", + " # for entry in dataset['extras']:\n", + " # if entry['key'] == 'temporal-extent-begin':\n", + " # start_date = entry['value']\n", + " # elif entry['key'] == 'temporal-extent-end':\n", + " # end_date= entry['value']\n", + " # elif entry['key'] == 'platform':\n", + " # platform = entry['value']\n", + " # elif entry['key'] == 'aggregation-info':\n", + " # my_list = json.loads(entry['value'])\n", + " # my_dict = {i: my_list[i] for i in range(len(my_list))}\n", + " # for agg in my_dict.keys():\n", + " # if my_dict[agg]['aggregate-dataset-identifier'] != \"\":\n", + " # dtype = my_dict[agg]['aggregate-dataset-identifier']\n", + "\n", + " # df = pd.DataFrame(\n", + " # {\n", + " # \"title\": [dataset[\"title\"]],\n", + " # #\"url\": [dataset[\"resources\"][0][\"url\"]],\n", + " # \"org\": [dataset[\"organization\"][\"title\"]],\n", + " # #\"platform\": platform,\n", + " # 'start_date':start_date,\n", + " # 'end_date':end_date,\n", + " # 'datatype': dtype,\n", + "\n", + " # }\n", + " # )\n", + "\n", + " df_plat = pd.concat([df_plat, df], ignore_index=True)\n", + " \n", + " result_count = df_plat.shape[0]\n", + "\n", + " if result_count >= num_results:\n", + " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", + " break\n", + " \n", + " #df_ioos_catalog = pd.concat([df_ioos_catalog, df_plat], ignore_index=True)\n", + "\n", + " print(\n", + " f\"num_results: {num_results}, result_count: {result_count}, total_result_count: {df_plat.shape[0]}\"\n", + " )\n", + " \n", + " return df_plat" + ] + }, + { + "cell_type": "markdown", + "id": "23812956", + "metadata": {}, + "source": [ + "## Actually do the querying" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "80c3323e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "num_results: 169, result_count: 0\n", + "num_results: 169, result_count: 169\n", + "num_results: 169, result_count: 169, total_result_count: 169\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
authorauthor_emailcreator_user_ididisopenlicense_idlicense_titlemaintainermaintainer_emailmetadata_created...titletypeurlversionextrasresourcestagsgroupsrelationships_as_subjectrelationships_as_object
0NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd7568a4c18a-ec0f-4c2d-9479-b96af1661f9cFalseNoneNoneNoneNone2025-05-09T16:04:20.047386...Glider - Trinidad Head Line: 2019 September - ...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'AUVS > Autonomous Underwate...[][][]
1NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75b369815e-03fc-4980-836f-d9b98b53ec0bFalseNoneNoneNoneNone2025-05-09T16:03:23.528924...Glider - Trinidad Head Line: 2015 September - ...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'AUVS > Autonomous Underwate...[][][]
2NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd757da90e03-f8aa-483e-96d0-7a27051b90b4FalseNoneNoneNoneNone2025-04-11T14:32:17.541505...Backyard Buoys - NANOOS - Washington: Quileute...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Backyard Buoys', 'id': 'f80...[][][]
3NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75703a2dae-6784-4317-9463-dfd2cdfa4d6cFalseNoneNoneNoneNone2025-05-09T16:03:49.832982...Glider - La Push Line: 2025 March - OngoingdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'AUVS > Autonomous Underwate...[][][]
4NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75d0ef6a3a-4894-43f3-b4ea-2a882dccc478FalseNoneNoneNoneNone2025-01-09T02:08:28.700159...NPBY1 - Point Wells: Meteorological Station DatadatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Earth Science > Atmosphere ...[][][]
..................................................................
164NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75bff06122-cf40-4611-b5b3-c8c79a71cfacFalseNoneNoneNoneNone2025-01-09T13:26:01.855237...(APL-UW) Ćháʔba· UW/NANOOS Moore...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
165NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd752449dd5c-57c5-43dd-a3d6-f52de352a0e5FalseNoneNoneNoneNone2025-01-09T13:25:59.040273...(WADOH) Hood Canal 1 site, W shore of Hood Can...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
166NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd7546917a4a-9e77-495b-a0d3-3c5cea2bc5e8FalseNoneNoneNoneNone2025-01-09T13:25:56.552833...(CMOP) Grays Point (USCG day mark green 13)datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
167NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd753261508c-5b1d-42a8-95ae-fe142449a216FalseNoneNoneNoneNone2025-01-09T13:25:53.600691...(WADOH) Skookum Inlet site, N shore near Deer ...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
168NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd7592c7abed-b1f8-4827-94f5-888d0aa7858eFalseNoneNoneNoneNone2025-01-09T13:25:50.892710...(WADOH) Eld Inlet site, W shore near Frye Cove...datasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
\n", + "

169 rows × 29 columns

\n", + "
" + ], + "text/plain": [ + " author author_email creator_user_id \\\n", + "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "1 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "2 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "3 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "4 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + ".. ... ... ... \n", + "164 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "165 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "166 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "167 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "168 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "\n", + " id isopen license_id license_title \\\n", + "0 68a4c18a-ec0f-4c2d-9479-b96af1661f9c False None None \n", + "1 b369815e-03fc-4980-836f-d9b98b53ec0b False None None \n", + "2 7da90e03-f8aa-483e-96d0-7a27051b90b4 False None None \n", + "3 703a2dae-6784-4317-9463-dfd2cdfa4d6c False None None \n", + "4 d0ef6a3a-4894-43f3-b4ea-2a882dccc478 False None None \n", + ".. ... ... ... ... \n", + "164 bff06122-cf40-4611-b5b3-c8c79a71cfac False None None \n", + "165 2449dd5c-57c5-43dd-a3d6-f52de352a0e5 False None None \n", + "166 46917a4a-9e77-495b-a0d3-3c5cea2bc5e8 False None None \n", + "167 3261508c-5b1d-42a8-95ae-fe142449a216 False None None \n", + "168 92c7abed-b1f8-4827-94f5-888d0aa7858e False None None \n", + "\n", + " maintainer maintainer_email metadata_created ... \\\n", + "0 None None 2025-05-09T16:04:20.047386 ... \n", + "1 None None 2025-05-09T16:03:23.528924 ... \n", + "2 None None 2025-04-11T14:32:17.541505 ... \n", + "3 None None 2025-05-09T16:03:49.832982 ... \n", + "4 None None 2025-01-09T02:08:28.700159 ... \n", + ".. ... ... ... ... \n", + "164 None None 2025-01-09T13:26:01.855237 ... \n", + "165 None None 2025-01-09T13:25:59.040273 ... \n", + "166 None None 2025-01-09T13:25:56.552833 ... \n", + "167 None None 2025-01-09T13:25:53.600691 ... \n", + "168 None None 2025-01-09T13:25:50.892710 ... \n", + "\n", + " title type url version \\\n", + "0 Glider - Trinidad Head Line: 2019 September - ... dataset None None \n", + "1 Glider - Trinidad Head Line: 2015 September - ... dataset None None \n", + "2 Backyard Buoys - NANOOS - Washington: Quileute... dataset None None \n", + "3 Glider - La Push Line: 2025 March - Ongoing dataset None None \n", + "4 NPBY1 - Point Wells: Meteorological Station Data dataset None None \n", + ".. ... ... ... ... \n", + "164 (APL-UW) Ćháʔba· UW/NANOOS Moore... dataset None None \n", + "165 (WADOH) Hood Canal 1 site, W shore of Hood Can... dataset None None \n", + "166 (CMOP) Grays Point (USCG day mark green 13) dataset None None \n", + "167 (WADOH) Skookum Inlet site, N shore near Deer ... dataset None None \n", + "168 (WADOH) Eld Inlet site, W shore near Frye Cove... dataset None None \n", + "\n", + " extras \\\n", + "0 [{'key': 'access-constraints', 'value': '[]'},... \n", + "1 [{'key': 'access-constraints', 'value': '[]'},... \n", + "2 [{'key': 'access-constraints', 'value': '[]'},... \n", + "3 [{'key': 'access-constraints', 'value': '[]'},... \n", + "4 [{'key': 'access-constraints', 'value': '[]'},... \n", + ".. ... \n", + "164 [{'key': 'access-constraints', 'value': '[]'},... \n", + "165 [{'key': 'access-constraints', 'value': '[]'},... \n", + "166 [{'key': 'access-constraints', 'value': '[]'},... \n", + "167 [{'key': 'access-constraints', 'value': '[]'},... \n", + "168 [{'key': 'access-constraints', 'value': '[]'},... \n", + "\n", + " resources \\\n", + "0 [{'cache_last_updated': None, 'cache_url': Non... \n", + "1 [{'cache_last_updated': None, 'cache_url': Non... \n", + "2 [{'cache_last_updated': None, 'cache_url': Non... \n", + "3 [{'cache_last_updated': None, 'cache_url': Non... \n", + "4 [{'cache_last_updated': None, 'cache_url': Non... \n", + ".. ... \n", + "164 [{'cache_last_updated': None, 'cache_url': Non... \n", + "165 [{'cache_last_updated': None, 'cache_url': Non... \n", + "166 [{'cache_last_updated': None, 'cache_url': Non... \n", + "167 [{'cache_last_updated': None, 'cache_url': Non... \n", + "168 [{'cache_last_updated': None, 'cache_url': Non... \n", + "\n", + " tags groups \\\n", + "0 [{'display_name': 'AUVS > Autonomous Underwate... [] \n", + "1 [{'display_name': 'AUVS > Autonomous Underwate... [] \n", + "2 [{'display_name': 'Backyard Buoys', 'id': 'f80... [] \n", + "3 [{'display_name': 'AUVS > Autonomous Underwate... [] \n", + "4 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + ".. ... ... \n", + "164 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "165 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "166 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "167 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "168 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "\n", + " relationships_as_subject relationships_as_object \n", + "0 [] [] \n", + "1 [] [] \n", + "2 [] [] \n", + "3 [] [] \n", + "4 [] [] \n", + ".. ... ... \n", + "164 [] [] \n", + "165 [] [] \n", + "166 [] [] \n", + "167 [] [] \n", + "168 [] [] \n", + "\n", + "[169 rows x 29 columns]" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import time\n", + "import pandas as pd\n", + "\n", + "from ckanapi import RemoteCKAN\n", + "from ckanapi.errors import CKANAPIError\n", + "from requests.exceptions import ChunkedEncodingError\n", + "from urllib3.exceptions import IncompleteRead\n", + "\n", + "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", + "\n", + "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", + "df_ioos_catalog = pd.DataFrame()\n", + "\n", + "\n", + "platforms = [\"Elliott Point\"]\n", + "orgs = [\"NANOOS\"]\n", + "\n", + "for org in orgs:\n", + " org_ncei = org.lower()\n", + "\n", + " for platform in platforms:\n", + "\n", + " filter_query = f\"organization:{org_ncei.lower()}\"\n", + "\n", + " free_text_query = f\"\"#{platform_ncei.lower()}\"\n", + " \n", + " df_ioos_catalog = pd.concat([df_ioos_catalog, ioos_ckan_query(ioos_catalog, filter_query, free_text_query)], ignore_index=True)\n", + "\n", + "df_ioos_catalog" + ] + }, + { + "cell_type": "markdown", + "id": "7197502f", + "metadata": {}, + "source": [ + "## Start filtering down to buoys and similar platforms" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "a19d1160", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleorgstart_dateend_datedatatype
2Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2024-05-02T04:40:00Z2025-08-29T20:30:00ZTimeSeries
4NPBY1 - Point Wells: Meteorological Station DataNANOOS2014-09-30T23:42:13Z2025-09-05T14:50:39ZTimeSeries
6NPBY2 - Carr Inlet: Meteorological Station DataNANOOS2014-12-17T04:26:26Z2025-09-05T15:29:20ZTimeSeries
23NEMO - ChaBa Meteorlogical - Gill Metpak ProNANOOS2017-05-01T06:10:38Z2025-09-03T12:55:36ZTimeSeries
32Se'lhaem, Bellingham Bay Meteorological Statio...NANOOS2016-02-14T20:14:02Z2025-07-05T14:20:02ZTimeSeries
34ORCA3 - Hansville: Meteorological Station DataNANOOS2015-04-01T19:04:49Z2024-12-16T10:09:46ZTimeSeries
41ORCA1 - Twanoh: Meteorological Station DataNANOOS2019-09-01T00:00:31Z2025-09-05T15:25:47ZTimeSeries
50ORCA4 - Dabob Bay: Meteorological Station DataNANOOS2019-02-20T20:22:04Z2025-06-04T22:09:56ZTimeSeries
51Se'lhaem, Bellingham Bay Surface Hydrological ...NANOOS2016-02-14T20:20:02Z2025-07-05T14:20:02ZTimeSeries
54Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2023-10-19T20:20:00Z2023-12-24T22:50:00ZTimeSeries
55NEMO - Cha'Ba, CTD Data, Near-realtime (incomp...NANOOS2025-04-23T19:20:45Z2025-09-03T12:40:12ZTimeSeries
69Se'lhaem, Bellingham Bay Deep Hydrological Sta...NANOOS2007-01-01T00:10:02Z2024-11-20T02:40:02ZTimeSeries
72NEMO - Chaba Wind - Vaisala WMT 700 AnemometerNANOOS2016-10-26T20:00:32Z2025-09-03T12:46:25ZTimeSeries
76ORCA2 - Hoodsport: Meteorological Station DataNANOOS2023-01-13T20:51:23Z2025-09-05T15:26:57ZTimeSeries
85Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2025-04-23T21:30:00Z2025-08-29T22:30:00ZTimeSeries
89Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2024-08-30T20:22:01Z2025-08-29T20:51:29ZTimeSeries
\n", + "
" + ], + "text/plain": [ + " title org \\\n", + "2 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", + "4 NPBY1 - Point Wells: Meteorological Station Data NANOOS \n", + "6 NPBY2 - Carr Inlet: Meteorological Station Data NANOOS \n", + "23 NEMO - ChaBa Meteorlogical - Gill Metpak Pro NANOOS \n", + "32 Se'lhaem, Bellingham Bay Meteorological Statio... NANOOS \n", + "34 ORCA3 - Hansville: Meteorological Station Data NANOOS \n", + "41 ORCA1 - Twanoh: Meteorological Station Data NANOOS \n", + "50 ORCA4 - Dabob Bay: Meteorological Station Data NANOOS \n", + "51 Se'lhaem, Bellingham Bay Surface Hydrological ... NANOOS \n", + "54 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", + "55 NEMO - Cha'Ba, CTD Data, Near-realtime (incomp... NANOOS \n", + "69 Se'lhaem, Bellingham Bay Deep Hydrological Sta... NANOOS \n", + "72 NEMO - Chaba Wind - Vaisala WMT 700 Anemometer NANOOS \n", + "76 ORCA2 - Hoodsport: Meteorological Station Data NANOOS \n", + "85 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", + "89 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", + "\n", + " start_date end_date datatype \n", + "2 2024-05-02T04:40:00Z 2025-08-29T20:30:00Z TimeSeries \n", + "4 2014-09-30T23:42:13Z 2025-09-05T14:50:39Z TimeSeries \n", + "6 2014-12-17T04:26:26Z 2025-09-05T15:29:20Z TimeSeries \n", + "23 2017-05-01T06:10:38Z 2025-09-03T12:55:36Z TimeSeries \n", + "32 2016-02-14T20:14:02Z 2025-07-05T14:20:02Z TimeSeries \n", + "34 2015-04-01T19:04:49Z 2024-12-16T10:09:46Z TimeSeries \n", + "41 2019-09-01T00:00:31Z 2025-09-05T15:25:47Z TimeSeries \n", + "50 2019-02-20T20:22:04Z 2025-06-04T22:09:56Z TimeSeries \n", + "51 2016-02-14T20:20:02Z 2025-07-05T14:20:02Z TimeSeries \n", + "54 2023-10-19T20:20:00Z 2023-12-24T22:50:00Z TimeSeries \n", + "55 2025-04-23T19:20:45Z 2025-09-03T12:40:12Z TimeSeries \n", + "69 2007-01-01T00:10:02Z 2024-11-20T02:40:02Z TimeSeries \n", + "72 2016-10-26T20:00:32Z 2025-09-03T12:46:25Z TimeSeries \n", + "76 2023-01-13T20:51:23Z 2025-09-05T15:26:57Z TimeSeries \n", + "85 2025-04-23T21:30:00Z 2025-08-29T22:30:00Z TimeSeries \n", + "89 2024-08-30T20:22:01Z 2025-08-29T20:51:29Z TimeSeries " + ] + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ioos_catalog.loc[df_ioos_catalog['datatype']=='TimeSeries']" + ] + }, + { + "cell_type": "markdown", + "id": "3b6d6d90", + "metadata": {}, + "source": [ + "Note that CMOP Elliott Point does not appear in the timeseries filtered results." + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "b5d04cb2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titleorgstart_dateend_datedatatype
128(CMOP) Elliott PointNANOOS2018-01-02T08:33:43+00:002019-02-10T21:27:42+00:00None
\n", + "
" + ], + "text/plain": [ + " title org start_date \\\n", + "128 (CMOP) Elliott Point NANOOS 2018-01-02T08:33:43+00:00 \n", + "\n", + " end_date datatype \n", + "128 2019-02-10T21:27:42+00:00 None " + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_ioos_catalog.loc[df_ioos_catalog['title'].str.contains('Elliott Point')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8607197e", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "IOOS", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 7576bd0a15162b3e66e264583e8721ab850655af Mon Sep 17 00:00:00 2001 From: "Mathew.Biddle" Date: Fri, 12 Sep 2025 13:34:01 -0400 Subject: [PATCH 2/2] tinkering --- .../2025-09-11-reconcile_RA_NCEI.ipynb | 2345 ++++++++++------- 1 file changed, 1448 insertions(+), 897 deletions(-) diff --git a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb index 0197e8dd..ce40ab27 100644 --- a/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb +++ b/jupyterbook/content/code_gallery/data_analysis_and_visualization_notebooks/2025-09-11-reconcile_RA_NCEI.ipynb @@ -34,7 +34,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 1, @@ -82,7 +82,7 @@ { "data": { "text/plain": [ - "44142" + "44147" ] }, "execution_count": 3, @@ -97,492 +97,37 @@ }, { "cell_type": "markdown", - "id": "a7f29d98", + "id": "b77305da", "metadata": {}, "source": [ - "## 2 options\n", - "\n", - "1. go accession by accession and extract the following info:\n", - "\n", - "Let's do some testing with the following NCEI accession:\n", - "https://www.ncei.noaa.gov/data/oceans/ncei/archive/metadata/approved/granule/0171311.xml\n", - "\n", - "```xml\n", - "\n", - "\n", - "\n", - "Indian Island station\n", - "```\n", - "\n", - "```xml\n", - "\n", - "\n", - "\n", - "Central and Northern California Ocean Observing System\n", - "```\n", - "\n", - "2. Use the collection level records to get what we need.\n", - "\n", - "For example, CeNCOOS:\n", - "https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-CeNCOOS;view=xml;responseType=text/xml\n", + "## Query IOOS Catalog for appropriate datasets\n", "\n", - "```xml\n", - "\n", - "\n", - "\n", - "Bodega Marine Laboratory seawater intake, Horseshoe Cove station,\n", - "\n", - "\n", - "Cal Poly Pier San Luis Obispo station\n", - "\n", - "\n", - "California Maritime pier Carquinez shore station\n", - "\n", - "\n", - "Fort Point Pier station\n", - "\n", - "\n", - "Hog Island Oyster Company Burkolator, Tomales Bay,\n", - "\n", - "\n", - "Humboldt Bay Pier station\n", - "\n", - "\n", - "Humboldt Dock B Shore Station\n", - "\n", - "\n", - "Indian Island station\n", - "\n", - "\n", - "Monterey Bay Commercial Wharf station\n", - "\n", - "\n", - "Morro Bay (BS1) station\n", - "\n", - "\n", - "Morro Bay station\n", - "\n", - "\n", - "Moss Landing Marine Laboratory Seawater Intake Monitoring Station\n", - "\n", - "\n", - "Romberg Tiburon Center Pier station\n", - "\n", - "\n", - "Santa Cruz municipal wharf station\n", - "\n", - "\n", - "Trinidad Head station\n", - "\n", - "\n", - "platform\n", - "\n", - "\n", - "\n", - "\n", - "Provider Platform Names\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "```" + "Gather all the datasets associated with an RA and filter to just buoys and similar platforms." ] }, { "cell_type": "markdown", - "id": "b7de9f4b", + "id": "23812956", "metadata": {}, "source": [ - "## Grab info from NCEI" + "## Actually do the querying" ] }, { "cell_type": "code", - "execution_count": null, - "id": "8c9a2649", + "execution_count": 42, + "id": "758d93a5", "metadata": {}, "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "f0af3d01", - "metadata": {}, - "source": [ - "## Do the searching\n" - ] - }, - { - "cell_type": "code", - "execution_count": 131, - "id": "d7757948", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "num_results: 1, result_count: 0\n", - "num_results: 1, result_count: 1\n", - "num_results: 1, result_count: 1, total_result_count: 1\n" - ] - } - ], "source": [ "import time\n", "import pandas as pd\n", - "import json\n", "\n", "from ckanapi import RemoteCKAN\n", "from ckanapi.errors import CKANAPIError\n", "from requests.exceptions import ChunkedEncodingError\n", "from urllib3.exceptions import IncompleteRead\n", "\n", - "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", - "\n", - "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", - "\n", - "df_ioos_catalog = pd.DataFrame()\n", - "df_plat = pd.DataFrame()\n", - "\n", - "result_count = 0\n", - "\n", - "platforms = [\"Elliott Point\"]\n", - "orgs = [\"NANOOS\"]\n", - "\n", - "for org in orgs:\n", - " org_ncei = org.lower()\n", - "\n", - " for platform in platforms:\n", - "\n", - " platform_ncei = platform\n", - "\n", - " filter_query = f\"organization:{org_ncei.lower()}\"\n", - "\n", - " free_text_query = f\"{platform_ncei.lower()}\"\n", - "\n", - " # ioos_catalog.action.package_search(\n", - " # fq=filter_query, \n", - " # q=free_text_query, \n", - " # rows=500, \n", - " # start=result_count,\n", - " # )\n", - "\n", - " while True:\n", - " try:\n", - " datasets = ioos_catalog.action.package_search(\n", - " fq=filter_query, \n", - " q=free_text_query, \n", - " rows=500, \n", - " start=result_count,\n", - " )\n", - " except (CKANAPIError, IncompleteRead, ChunkedEncodingError):\n", - " continue\n", - "\n", - " #result_count = datasets.shape[0]\n", - "\n", - " num_results = datasets[\"count\"]\n", - " \n", - " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", - "\n", - " for dataset in datasets[\"results\"]:\n", - " \n", - " # maybe just read all metadata into a DataFrame.\n", - " df = pd.DataFrame.from_dict(dataset, orient='index').T\n", - "\n", - " # for entry in dataset['extras']:\n", - " # if entry['key'] == 'temporal-extent-begin':\n", - " # start_date = entry['value']\n", - " # elif entry['key'] == 'temporal-extent-end':\n", - " # end_date= entry['value']\n", - " # elif entry['key'] == 'aggregation-info':\n", - " # my_list = json.loads(entry['value'])\n", - " # my_dict = {i: my_list[i] for i in range(len(my_list))}\n", - " # for agg in my_dict.keys():\n", - " # if my_dict[agg]['aggregate-dataset-identifier'] != \"\":\n", - " # dtype = my_dict[agg]['aggregate-dataset-identifier']\n", - " \n", - " # df = pd.DataFrame(\n", - " # {\n", - " # \"title\": [dataset[\"title\"]],\n", - " # \"url\": [dataset[\"resources\"][0][\"url\"]],\n", - " # \"org\": [dataset[\"organization\"][\"title\"]],\n", - " # \"platform\": platform_ncei,\n", - " # 'start_date':start_date,\n", - " # 'end_date':end_date,\n", - " # 'datatype': dtype,\n", - "\n", - " # }\n", - " # )\n", - "\n", - " df_plat = pd.concat([df_plat, df], ignore_index=True)\n", - " \n", - " result_count = df_plat.shape[0]\n", - "\n", - " if result_count >= num_results:\n", - " print(f\"num_results: {num_results}, result_count: {result_count}\")\n", - " break\n", - " \n", - " df_ioos_catalog = pd.concat([df_ioos_catalog, df_plat], ignore_index=True)\n", - "\n", - " print(\n", - " f\"num_results: {num_results}, result_count: {result_count}, total_result_count: {df_ioos_catalog.shape[0]}\"\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 132, - "id": "67f79caa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
authorauthor_emailcreator_user_ididisopenlicense_idlicense_titlemaintainermaintainer_emailmetadata_created...titletypeurlversionextrasresourcestagsgroupsrelationships_as_subjectrelationships_as_object
0NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75856b00a5-4c7d-4f8c-8244-e77fb85e793eFalseNoneNoneNoneNone2025-01-09T13:27:21.796623...(CMOP) Elliott PointdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
\n", - "

1 rows × 29 columns

\n", - "
" - ], - "text/plain": [ - " author author_email creator_user_id \\\n", - "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "\n", - " id isopen license_id license_title \\\n", - "0 856b00a5-4c7d-4f8c-8244-e77fb85e793e False None None \n", - "\n", - " maintainer maintainer_email metadata_created ... \\\n", - "0 None None 2025-01-09T13:27:21.796623 ... \n", - "\n", - " title type url version \\\n", - "0 (CMOP) Elliott Point dataset None None \n", - "\n", - " extras \\\n", - "0 [{'key': 'access-constraints', 'value': '[]'},... \n", - "\n", - " resources \\\n", - "0 [{'cache_last_updated': None, 'cache_url': Non... \n", - "\n", - " tags groups \\\n", - "0 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", - "\n", - " relationships_as_subject relationships_as_object \n", - "0 [] [] \n", - "\n", - "[1 rows x 29 columns]" - ] - }, - "execution_count": 132, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_ioos_catalog" - ] - }, - { - "cell_type": "code", - "execution_count": 130, - "id": "b9b2a4d8", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
authorauthor_emailcreator_user_ididisopenlicense_idlicense_titlemaintainermaintainer_emailmetadata_created...titletypeurlversionextrasresourcestagsgroupsrelationships_as_subjectrelationships_as_object
0NoneNone0ea3933c-4674-41dd-a17d-bfbc8c99bd75856b00a5-4c7d-4f8c-8244-e77fb85e793eFalseNoneNoneNoneNone2025-01-09T13:27:21.796623...(CMOP) Elliott PointdatasetNoneNone[{'key': 'access-constraints', 'value': '[]'},...[{'cache_last_updated': None, 'cache_url': Non...[{'display_name': 'Air Temperature', 'id': 'a6...[][][]
\n", - "

1 rows × 29 columns

\n", - "
" - ], - "text/plain": [ - " author author_email creator_user_id \\\n", - "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "\n", - " id isopen license_id license_title \\\n", - "0 856b00a5-4c7d-4f8c-8244-e77fb85e793e False None None \n", - "\n", - " maintainer maintainer_email metadata_created ... \\\n", - "0 None None 2025-01-09T13:27:21.796623 ... \n", - "\n", - " title type url version \\\n", - "0 (CMOP) Elliott Point dataset None None \n", - "\n", - " extras \\\n", - "0 [{'key': 'access-constraints', 'value': '[]'},... \n", - "\n", - " resources \\\n", - "0 [{'cache_last_updated': None, 'cache_url': Non... \n", - "\n", - " tags groups \\\n", - "0 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", - "\n", - " relationships_as_subject relationships_as_object \n", - "0 [] [] \n", - "\n", - "[1 rows x 29 columns]" - ] - }, - "execution_count": 130, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.DataFrame.from_dict(dataset, orient='index').T" - ] - }, - { - "cell_type": "markdown", - "id": "b77305da", - "metadata": {}, - "source": [ - "## Query IOOS Catalog for appropriate datasets\n", - "\n", - "Gather all the datasets associated with an RA and filter to just buoys and similar platforms." - ] - }, - { - "cell_type": "code", - "execution_count": 133, - "id": "12318daf", - "metadata": {}, - "outputs": [], - "source": [ "def ioos_ckan_query(ioos_catalog, filter_query, free_text_query):\n", " '''\n", " Function to query the IOOS catalog with a filter query and free text query.\n", @@ -621,33 +166,6 @@ "\n", " for dataset in datasets[\"results\"]:\n", " df = pd.DataFrame.from_dict(dataset, orient='index').T\n", - " # dtype = None\n", - " # for entry in dataset['extras']:\n", - " # if entry['key'] == 'temporal-extent-begin':\n", - " # start_date = entry['value']\n", - " # elif entry['key'] == 'temporal-extent-end':\n", - " # end_date= entry['value']\n", - " # elif entry['key'] == 'platform':\n", - " # platform = entry['value']\n", - " # elif entry['key'] == 'aggregation-info':\n", - " # my_list = json.loads(entry['value'])\n", - " # my_dict = {i: my_list[i] for i in range(len(my_list))}\n", - " # for agg in my_dict.keys():\n", - " # if my_dict[agg]['aggregate-dataset-identifier'] != \"\":\n", - " # dtype = my_dict[agg]['aggregate-dataset-identifier']\n", - "\n", - " # df = pd.DataFrame(\n", - " # {\n", - " # \"title\": [dataset[\"title\"]],\n", - " # #\"url\": [dataset[\"resources\"][0][\"url\"]],\n", - " # \"org\": [dataset[\"organization\"][\"title\"]],\n", - " # #\"platform\": platform,\n", - " # 'start_date':start_date,\n", - " # 'end_date':end_date,\n", - " # 'datatype': dtype,\n", - "\n", - " # }\n", - " # )\n", "\n", " df_plat = pd.concat([df_plat, df], ignore_index=True)\n", " \n", @@ -666,17 +184,9 @@ " return df_plat" ] }, - { - "cell_type": "markdown", - "id": "23812956", - "metadata": {}, - "source": [ - "## Actually do the querying" - ] - }, { "cell_type": "code", - "execution_count": 134, + "execution_count": null, "id": "80c3323e", "metadata": {}, "outputs": [ @@ -684,9 +194,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "num_results: 169, result_count: 0\n", - "num_results: 169, result_count: 169\n", - "num_results: 169, result_count: 169, total_result_count: 169\n" + "num_results: 95, result_count: 0\n", + "num_results: 95, result_count: 95\n", + "num_results: 95, result_count: 95, total_result_count: 95\n" ] }, { @@ -739,21 +249,21 @@ " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", - " 68a4c18a-ec0f-4c2d-9479-b96af1661f9c\n", + " 7da90e03-f8aa-483e-96d0-7a27051b90b4\n", " False\n", " None\n", " None\n", " None\n", " None\n", - " 2025-05-09T16:04:20.047386\n", + " 2025-04-11T14:32:17.541505\n", " ...\n", - " Glider - Trinidad Head Line: 2019 September - ...\n", + " Backyard Buoys - NANOOS - Washington: Quileute...\n", " dataset\n", " None\n", " None\n", " [{'key': 'access-constraints', 'value': '[]'},...\n", " [{'cache_last_updated': None, 'cache_url': Non...\n", - " [{'display_name': 'AUVS > Autonomous Underwate...\n", + " [{'display_name': 'Backyard Buoys', 'id': 'f80...\n", " []\n", " []\n", " []\n", @@ -763,21 +273,21 @@ " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", - " b369815e-03fc-4980-836f-d9b98b53ec0b\n", + " d0ef6a3a-4894-43f3-b4ea-2a882dccc478\n", " False\n", " None\n", " None\n", " None\n", " None\n", - " 2025-05-09T16:03:23.528924\n", + " 2025-01-09T02:08:28.700159\n", " ...\n", - " Glider - Trinidad Head Line: 2015 September - ...\n", + " NPBY1 - Point Wells: Meteorological Station Data\n", " dataset\n", " None\n", " None\n", " [{'key': 'access-constraints', 'value': '[]'},...\n", " [{'cache_last_updated': None, 'cache_url': Non...\n", - " [{'display_name': 'AUVS > Autonomous Underwate...\n", + " [{'display_name': 'Earth Science > Atmosphere ...\n", " []\n", " []\n", " []\n", @@ -787,21 +297,21 @@ " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", - " 7da90e03-f8aa-483e-96d0-7a27051b90b4\n", + " 0bd3b7ac-cb00-4dd5-87ca-e55bd6fb8d16\n", " False\n", " None\n", " None\n", " None\n", " None\n", - " 2025-04-11T14:32:17.541505\n", + " 2024-11-08T12:57:32.204016\n", " ...\n", - " Backyard Buoys - NANOOS - Washington: Quileute...\n", + " NPBY2 - Carr Inlet: Meteorological Station Data\n", " dataset\n", " None\n", " None\n", " [{'key': 'access-constraints', 'value': '[]'},...\n", " [{'cache_last_updated': None, 'cache_url': Non...\n", - " [{'display_name': 'Backyard Buoys', 'id': 'f80...\n", + " [{'display_name': 'Earth Science > Atmosphere ...\n", " []\n", " []\n", " []\n", @@ -811,21 +321,21 @@ " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", - " 703a2dae-6784-4317-9463-dfd2cdfa4d6c\n", + " 17f2f79e-bab0-4c2d-a0a1-2abf40acaa52\n", " False\n", " None\n", " None\n", " None\n", " None\n", - " 2025-05-09T16:03:49.832982\n", + " 2025-01-09T02:08:48.402290\n", " ...\n", - " Glider - La Push Line: 2025 March - Ongoing\n", + " NANOOS Mooring ORCA Pt Wells\n", " dataset\n", " None\n", " None\n", " [{'key': 'access-constraints', 'value': '[]'},...\n", " [{'cache_last_updated': None, 'cache_url': Non...\n", - " [{'display_name': 'AUVS > Autonomous Underwate...\n", + " [{'display_name': 'Air Temperature', 'id': 'a6...\n", " []\n", " []\n", " []\n", @@ -835,15 +345,15 @@ " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", - " d0ef6a3a-4894-43f3-b4ea-2a882dccc478\n", + " cb6612de-ae7b-4827-a1f9-0d943174ae15\n", " False\n", " None\n", " None\n", " None\n", " None\n", - " 2025-01-09T02:08:28.700159\n", + " 2025-05-09T16:03:29.657747\n", " ...\n", - " NPBY1 - Point Wells: Meteorological Station Data\n", + " NEMO - ChaBa Meteorlogical - Gill Metpak Pro\n", " dataset\n", " None\n", " None\n", @@ -879,7 +389,7 @@ " ...\n", " \n", " \n", - " 164\n", + " 90\n", " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", @@ -903,7 +413,7 @@ " []\n", " \n", " \n", - " 165\n", + " 91\n", " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", @@ -927,7 +437,7 @@ " []\n", " \n", " \n", - " 166\n", + " 92\n", " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", @@ -951,7 +461,7 @@ " []\n", " \n", " \n", - " 167\n", + " 93\n", " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", @@ -975,7 +485,7 @@ " []\n", " \n", " \n", - " 168\n", + " 94\n", " None\n", " None\n", " 0ea3933c-4674-41dd-a17d-bfbc8c99bd75\n", @@ -1000,452 +510,1493 @@ " \n", " \n", "\n", - "

169 rows × 29 columns

\n", + "

95 rows × 29 columns

\n", "" ], "text/plain": [ - " author author_email creator_user_id \\\n", - "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "1 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "2 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "3 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "4 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - ".. ... ... ... \n", - "164 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "165 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "166 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "167 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", - "168 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + " author author_email creator_user_id \\\n", + "0 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "1 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "2 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "3 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "4 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + ".. ... ... ... \n", + "90 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "91 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "92 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "93 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", + "94 None None 0ea3933c-4674-41dd-a17d-bfbc8c99bd75 \n", "\n", - " id isopen license_id license_title \\\n", - "0 68a4c18a-ec0f-4c2d-9479-b96af1661f9c False None None \n", - "1 b369815e-03fc-4980-836f-d9b98b53ec0b False None None \n", - "2 7da90e03-f8aa-483e-96d0-7a27051b90b4 False None None \n", - "3 703a2dae-6784-4317-9463-dfd2cdfa4d6c False None None \n", - "4 d0ef6a3a-4894-43f3-b4ea-2a882dccc478 False None None \n", - ".. ... ... ... ... \n", - "164 bff06122-cf40-4611-b5b3-c8c79a71cfac False None None \n", - "165 2449dd5c-57c5-43dd-a3d6-f52de352a0e5 False None None \n", - "166 46917a4a-9e77-495b-a0d3-3c5cea2bc5e8 False None None \n", - "167 3261508c-5b1d-42a8-95ae-fe142449a216 False None None \n", - "168 92c7abed-b1f8-4827-94f5-888d0aa7858e False None None \n", + " id isopen license_id license_title \\\n", + "0 7da90e03-f8aa-483e-96d0-7a27051b90b4 False None None \n", + "1 d0ef6a3a-4894-43f3-b4ea-2a882dccc478 False None None \n", + "2 0bd3b7ac-cb00-4dd5-87ca-e55bd6fb8d16 False None None \n", + "3 17f2f79e-bab0-4c2d-a0a1-2abf40acaa52 False None None \n", + "4 cb6612de-ae7b-4827-a1f9-0d943174ae15 False None None \n", + ".. ... ... ... ... \n", + "90 bff06122-cf40-4611-b5b3-c8c79a71cfac False None None \n", + "91 2449dd5c-57c5-43dd-a3d6-f52de352a0e5 False None None \n", + "92 46917a4a-9e77-495b-a0d3-3c5cea2bc5e8 False None None \n", + "93 3261508c-5b1d-42a8-95ae-fe142449a216 False None None \n", + "94 92c7abed-b1f8-4827-94f5-888d0aa7858e False None None \n", "\n", - " maintainer maintainer_email metadata_created ... \\\n", - "0 None None 2025-05-09T16:04:20.047386 ... \n", - "1 None None 2025-05-09T16:03:23.528924 ... \n", - "2 None None 2025-04-11T14:32:17.541505 ... \n", - "3 None None 2025-05-09T16:03:49.832982 ... \n", - "4 None None 2025-01-09T02:08:28.700159 ... \n", - ".. ... ... ... ... \n", - "164 None None 2025-01-09T13:26:01.855237 ... \n", - "165 None None 2025-01-09T13:25:59.040273 ... \n", - "166 None None 2025-01-09T13:25:56.552833 ... \n", - "167 None None 2025-01-09T13:25:53.600691 ... \n", - "168 None None 2025-01-09T13:25:50.892710 ... \n", + " maintainer maintainer_email metadata_created ... \\\n", + "0 None None 2025-04-11T14:32:17.541505 ... \n", + "1 None None 2025-01-09T02:08:28.700159 ... \n", + "2 None None 2024-11-08T12:57:32.204016 ... \n", + "3 None None 2025-01-09T02:08:48.402290 ... \n", + "4 None None 2025-05-09T16:03:29.657747 ... \n", + ".. ... ... ... ... \n", + "90 None None 2025-01-09T13:26:01.855237 ... \n", + "91 None None 2025-01-09T13:25:59.040273 ... \n", + "92 None None 2025-01-09T13:25:56.552833 ... \n", + "93 None None 2025-01-09T13:25:53.600691 ... \n", + "94 None None 2025-01-09T13:25:50.892710 ... \n", "\n", - " title type url version \\\n", - "0 Glider - Trinidad Head Line: 2019 September - ... dataset None None \n", - "1 Glider - Trinidad Head Line: 2015 September - ... dataset None None \n", - "2 Backyard Buoys - NANOOS - Washington: Quileute... dataset None None \n", - "3 Glider - La Push Line: 2025 March - Ongoing dataset None None \n", - "4 NPBY1 - Point Wells: Meteorological Station Data dataset None None \n", - ".. ... ... ... ... \n", - "164 (APL-UW) Ćháʔba· UW/NANOOS Moore... dataset None None \n", - "165 (WADOH) Hood Canal 1 site, W shore of Hood Can... dataset None None \n", - "166 (CMOP) Grays Point (USCG day mark green 13) dataset None None \n", - "167 (WADOH) Skookum Inlet site, N shore near Deer ... dataset None None \n", - "168 (WADOH) Eld Inlet site, W shore near Frye Cove... dataset None None \n", + " title type url version \\\n", + "0 Backyard Buoys - NANOOS - Washington: Quileute... dataset None None \n", + "1 NPBY1 - Point Wells: Meteorological Station Data dataset None None \n", + "2 NPBY2 - Carr Inlet: Meteorological Station Data dataset None None \n", + "3 NANOOS Mooring ORCA Pt Wells dataset None None \n", + "4 NEMO - ChaBa Meteorlogical - Gill Metpak Pro dataset None None \n", + ".. ... ... ... ... \n", + "90 (APL-UW) Ćháʔba· UW/NANOOS Moore... dataset None None \n", + "91 (WADOH) Hood Canal 1 site, W shore of Hood Can... dataset None None \n", + "92 (CMOP) Grays Point (USCG day mark green 13) dataset None None \n", + "93 (WADOH) Skookum Inlet site, N shore near Deer ... dataset None None \n", + "94 (WADOH) Eld Inlet site, W shore near Frye Cove... dataset None None \n", "\n", - " extras \\\n", - "0 [{'key': 'access-constraints', 'value': '[]'},... \n", - "1 [{'key': 'access-constraints', 'value': '[]'},... \n", - "2 [{'key': 'access-constraints', 'value': '[]'},... \n", - "3 [{'key': 'access-constraints', 'value': '[]'},... \n", - "4 [{'key': 'access-constraints', 'value': '[]'},... \n", - ".. ... \n", - "164 [{'key': 'access-constraints', 'value': '[]'},... \n", - "165 [{'key': 'access-constraints', 'value': '[]'},... \n", - "166 [{'key': 'access-constraints', 'value': '[]'},... \n", - "167 [{'key': 'access-constraints', 'value': '[]'},... \n", - "168 [{'key': 'access-constraints', 'value': '[]'},... \n", + " extras \\\n", + "0 [{'key': 'access-constraints', 'value': '[]'},... \n", + "1 [{'key': 'access-constraints', 'value': '[]'},... \n", + "2 [{'key': 'access-constraints', 'value': '[]'},... \n", + "3 [{'key': 'access-constraints', 'value': '[]'},... \n", + "4 [{'key': 'access-constraints', 'value': '[]'},... \n", + ".. ... \n", + "90 [{'key': 'access-constraints', 'value': '[]'},... \n", + "91 [{'key': 'access-constraints', 'value': '[]'},... \n", + "92 [{'key': 'access-constraints', 'value': '[]'},... \n", + "93 [{'key': 'access-constraints', 'value': '[]'},... \n", + "94 [{'key': 'access-constraints', 'value': '[]'},... \n", "\n", - " resources \\\n", - "0 [{'cache_last_updated': None, 'cache_url': Non... \n", - "1 [{'cache_last_updated': None, 'cache_url': Non... \n", - "2 [{'cache_last_updated': None, 'cache_url': Non... \n", - "3 [{'cache_last_updated': None, 'cache_url': Non... \n", - "4 [{'cache_last_updated': None, 'cache_url': Non... \n", - ".. ... \n", - "164 [{'cache_last_updated': None, 'cache_url': Non... \n", - "165 [{'cache_last_updated': None, 'cache_url': Non... \n", - "166 [{'cache_last_updated': None, 'cache_url': Non... \n", - "167 [{'cache_last_updated': None, 'cache_url': Non... \n", - "168 [{'cache_last_updated': None, 'cache_url': Non... \n", + " resources \\\n", + "0 [{'cache_last_updated': None, 'cache_url': Non... \n", + "1 [{'cache_last_updated': None, 'cache_url': Non... \n", + "2 [{'cache_last_updated': None, 'cache_url': Non... \n", + "3 [{'cache_last_updated': None, 'cache_url': Non... \n", + "4 [{'cache_last_updated': None, 'cache_url': Non... \n", + ".. ... \n", + "90 [{'cache_last_updated': None, 'cache_url': Non... \n", + "91 [{'cache_last_updated': None, 'cache_url': Non... \n", + "92 [{'cache_last_updated': None, 'cache_url': Non... \n", + "93 [{'cache_last_updated': None, 'cache_url': Non... \n", + "94 [{'cache_last_updated': None, 'cache_url': Non... \n", "\n", - " tags groups \\\n", - "0 [{'display_name': 'AUVS > Autonomous Underwate... [] \n", - "1 [{'display_name': 'AUVS > Autonomous Underwate... [] \n", - "2 [{'display_name': 'Backyard Buoys', 'id': 'f80... [] \n", - "3 [{'display_name': 'AUVS > Autonomous Underwate... [] \n", - "4 [{'display_name': 'Earth Science > Atmosphere ... [] \n", - ".. ... ... \n", - "164 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", - "165 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", - "166 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", - "167 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", - "168 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + " tags groups \\\n", + "0 [{'display_name': 'Backyard Buoys', 'id': 'f80... [] \n", + "1 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + "2 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + "3 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "4 [{'display_name': 'Earth Science > Atmosphere ... [] \n", + ".. ... ... \n", + "90 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "91 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "92 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "93 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", + "94 [{'display_name': 'Air Temperature', 'id': 'a6... [] \n", "\n", - " relationships_as_subject relationships_as_object \n", - "0 [] [] \n", - "1 [] [] \n", - "2 [] [] \n", - "3 [] [] \n", - "4 [] [] \n", - ".. ... ... \n", - "164 [] [] \n", - "165 [] [] \n", - "166 [] [] \n", - "167 [] [] \n", - "168 [] [] \n", + " relationships_as_subject relationships_as_object \n", + "0 [] [] \n", + "1 [] [] \n", + "2 [] [] \n", + "3 [] [] \n", + "4 [] [] \n", + ".. ... ... \n", + "90 [] [] \n", + "91 [] [] \n", + "92 [] [] \n", + "93 [] [] \n", + "94 [] [] \n", "\n", - "[169 rows x 29 columns]" + "[95 rows x 29 columns]" ] }, - "execution_count": 134, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import time\n", - "import pandas as pd\n", - "\n", - "from ckanapi import RemoteCKAN\n", - "from ckanapi.errors import CKANAPIError\n", - "from requests.exceptions import ChunkedEncodingError\n", - "from urllib3.exceptions import IncompleteRead\n", "\n", "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", "\n", "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", "df_ioos_catalog = pd.DataFrame()\n", "\n", - "\n", - "platforms = [\"Elliott Point\"]\n", "orgs = [\"NANOOS\"]\n", "\n", "for org in orgs:\n", " org_ncei = org.lower()\n", "\n", - " for platform in platforms:\n", + " filter_query = f''\n", "\n", - " filter_query = f\"organization:{org_ncei.lower()}\"\n", + " free_text_query = f'organization:{org_ncei.lower()} NOT (glider OR model)'\n", "\n", - " free_text_query = f\"\"#{platform_ncei.lower()}\"\n", - " \n", - " df_ioos_catalog = pd.concat([df_ioos_catalog, ioos_ckan_query(ioos_catalog, filter_query, free_text_query)], ignore_index=True)\n", + " df_search = ioos_ckan_query(ioos_catalog, filter_query, free_text_query)\n", + "\n", + " df_ioos_catalog = pd.concat([df_ioos_catalog, df_search], ignore_index=True)\n", "\n", "df_ioos_catalog" ] }, { "cell_type": "markdown", - "id": "7197502f", + "id": "8607197e", "metadata": {}, "source": [ - "## Start filtering down to buoys and similar platforms" + "## Search NCEI" ] }, { "cell_type": "code", - "execution_count": 120, - "id": "a19d1160", + "execution_count": 50, + "id": "5434426a", + "metadata": {}, + "outputs": [], + "source": [ + "# fuzzy_xml_search.py\n", + "# This script performs a fuzzy search on the text content of an XML file.\n", + "\n", + "import xml.etree.ElementTree as ET\n", + "from thefuzz import fuzz\n", + "\n", + "def fuzzy_search_in_xml(tree, search_query, score_cutoff=70):\n", + " \"\"\"\n", + " Performs a fuzzy search for a query string within the text of all elements in an XML file.\n", + "\n", + " Args:\n", + " xml_file_path (str): The path to the XML file.\n", + " search_query (str): The string to search for.\n", + " score_cutoff (int): The minimum similarity score (0-100) to consider a match.\n", + " Defaults to 70.\n", + "\n", + " Returns:\n", + " list: A list of dictionaries, where each dictionary represents a match\n", + " and contains the element's tag, its text, and the similarity score.\n", + " Returns an empty list if no matches are found or if the file cannot be parsed.\n", + " \"\"\"\n", + " matches = []\n", + " try:\n", + " # Parse the XML file\n", + " #iso = _openurl_with_retry(url)\n", + " #tree = ET.parse(iso)\n", + " root = tree.getroot()\n", + "\n", + " # Iterate through every element in the XML tree\n", + " for element in root.iter():\n", + " # Check if the element has text content\n", + " if element.text and element.text.strip():\n", + " element_text = element.text.strip()\n", + " # Calculate the fuzzy match score (partial_ratio is good for finding substrings)\n", + " score = fuzz.partial_ratio(search_query.lower(), element_text.lower())\n", + "\n", + " # If the score is above the cutoff, we have a match\n", + " if score >= score_cutoff:\n", + " matches.append({\n", + " 'tag': element.tag,\n", + " 'text': element_text,\n", + " 'score': score\n", + " })\n", + "\n", + " except ET.ParseError as e:\n", + " print(f\"Error parsing XML file: {e}\")\n", + " except FileNotFoundError:\n", + " print(f\"Error: The file '{url}' was not found.\")\n", + " \n", + " # Sort matches by score in descending order\n", + " matches.sort(key=lambda x: x['score'], reverse=True)\n", + " \n", + " return matches" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3b5eeb1", "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleorgstart_dateend_datedatatype
2Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2024-05-02T04:40:00Z2025-08-29T20:30:00ZTimeSeries
4NPBY1 - Point Wells: Meteorological Station DataNANOOS2014-09-30T23:42:13Z2025-09-05T14:50:39ZTimeSeries
6NPBY2 - Carr Inlet: Meteorological Station DataNANOOS2014-12-17T04:26:26Z2025-09-05T15:29:20ZTimeSeries
23NEMO - ChaBa Meteorlogical - Gill Metpak ProNANOOS2017-05-01T06:10:38Z2025-09-03T12:55:36ZTimeSeries
32Se'lhaem, Bellingham Bay Meteorological Statio...NANOOS2016-02-14T20:14:02Z2025-07-05T14:20:02ZTimeSeries
34ORCA3 - Hansville: Meteorological Station DataNANOOS2015-04-01T19:04:49Z2024-12-16T10:09:46ZTimeSeries
41ORCA1 - Twanoh: Meteorological Station DataNANOOS2019-09-01T00:00:31Z2025-09-05T15:25:47ZTimeSeries
50ORCA4 - Dabob Bay: Meteorological Station DataNANOOS2019-02-20T20:22:04Z2025-06-04T22:09:56ZTimeSeries
51Se'lhaem, Bellingham Bay Surface Hydrological ...NANOOS2016-02-14T20:20:02Z2025-07-05T14:20:02ZTimeSeries
54Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2023-10-19T20:20:00Z2023-12-24T22:50:00ZTimeSeries
55NEMO - Cha'Ba, CTD Data, Near-realtime (incomp...NANOOS2025-04-23T19:20:45Z2025-09-03T12:40:12ZTimeSeries
69Se'lhaem, Bellingham Bay Deep Hydrological Sta...NANOOS2007-01-01T00:10:02Z2024-11-20T02:40:02ZTimeSeries
72NEMO - Chaba Wind - Vaisala WMT 700 AnemometerNANOOS2016-10-26T20:00:32Z2025-09-03T12:46:25ZTimeSeries
76ORCA2 - Hoodsport: Meteorological Station DataNANOOS2023-01-13T20:51:23Z2025-09-05T15:26:57ZTimeSeries
85Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2025-04-23T21:30:00Z2025-08-29T22:30:00ZTimeSeries
89Backyard Buoys - NANOOS - Washington: Quileute...NANOOS2024-08-30T20:22:01Z2025-08-29T20:51:29ZTimeSeries
\n", - "
" - ], - "text/plain": [ - " title org \\\n", - "2 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", - "4 NPBY1 - Point Wells: Meteorological Station Data NANOOS \n", - "6 NPBY2 - Carr Inlet: Meteorological Station Data NANOOS \n", - "23 NEMO - ChaBa Meteorlogical - Gill Metpak Pro NANOOS \n", - "32 Se'lhaem, Bellingham Bay Meteorological Statio... NANOOS \n", - "34 ORCA3 - Hansville: Meteorological Station Data NANOOS \n", - "41 ORCA1 - Twanoh: Meteorological Station Data NANOOS \n", - "50 ORCA4 - Dabob Bay: Meteorological Station Data NANOOS \n", - "51 Se'lhaem, Bellingham Bay Surface Hydrological ... NANOOS \n", - "54 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", - "55 NEMO - Cha'Ba, CTD Data, Near-realtime (incomp... NANOOS \n", - "69 Se'lhaem, Bellingham Bay Deep Hydrological Sta... NANOOS \n", - "72 NEMO - Chaba Wind - Vaisala WMT 700 Anemometer NANOOS \n", - "76 ORCA2 - Hoodsport: Meteorological Station Data NANOOS \n", - "85 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", - "89 Backyard Buoys - NANOOS - Washington: Quileute... NANOOS \n", - "\n", - " start_date end_date datatype \n", - "2 2024-05-02T04:40:00Z 2025-08-29T20:30:00Z TimeSeries \n", - "4 2014-09-30T23:42:13Z 2025-09-05T14:50:39Z TimeSeries \n", - "6 2014-12-17T04:26:26Z 2025-09-05T15:29:20Z TimeSeries \n", - "23 2017-05-01T06:10:38Z 2025-09-03T12:55:36Z TimeSeries \n", - "32 2016-02-14T20:14:02Z 2025-07-05T14:20:02Z TimeSeries \n", - "34 2015-04-01T19:04:49Z 2024-12-16T10:09:46Z TimeSeries \n", - "41 2019-09-01T00:00:31Z 2025-09-05T15:25:47Z TimeSeries \n", - "50 2019-02-20T20:22:04Z 2025-06-04T22:09:56Z TimeSeries \n", - "51 2016-02-14T20:20:02Z 2025-07-05T14:20:02Z TimeSeries \n", - "54 2023-10-19T20:20:00Z 2023-12-24T22:50:00Z TimeSeries \n", - "55 2025-04-23T19:20:45Z 2025-09-03T12:40:12Z TimeSeries \n", - "69 2007-01-01T00:10:02Z 2024-11-20T02:40:02Z TimeSeries \n", - "72 2016-10-26T20:00:32Z 2025-09-03T12:46:25Z TimeSeries \n", - "76 2023-01-13T20:51:23Z 2025-09-05T15:26:57Z TimeSeries \n", - "85 2025-04-23T21:30:00Z 2025-08-29T22:30:00Z TimeSeries \n", - "89 2024-08-30T20:22:01Z 2025-08-29T20:51:29Z TimeSeries " - ] - }, - "execution_count": 120, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Searching for '(CMOP) SATURN 1' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 3 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 82\n", + "--------------------\n" + ] } ], "source": [ - "df_ioos_catalog.loc[df_ioos_catalog['datatype']=='TimeSeries']" + "# Example usage\n", + "organization = \"NANOOS\"\n", + "XML_FILE = f\"https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-{organization};view=xml;responseType=text/xml\"\n", + "SEARCH_QUERY = \"(CMOP) SATURN 1\" \n", + "SCORE_CUTOFF = 80 # Adjust this value to make the search more or less strict\n", + "\n", + "print(f\"Searching for '{SEARCH_QUERY}' in '{XML_FILE}' (cutoff score: {SCORE_CUTOFF})...\\n\")\n", + "\n", + "# Perform the search\n", + "results = fuzzy_search_in_xml(XML_FILE, SEARCH_QUERY, SCORE_CUTOFF)\n", + "\n", + "# Display the results\n", + "if results:\n", + " print(f\"Found {len(results)} match(es):\")\n", + " for result in results:\n", + " print(\"-\" * 20)\n", + " print(f\" Tag: {result['tag']}\")\n", + " print(f\" Text: '{result['text']}'\")\n", + " print(f\" Score: {result['score']}\")\n", + " print(\"-\" * 20)\n", + "else:\n", + " print(\"No matches found.\")" ] }, { "cell_type": "markdown", - "id": "3b6d6d90", + "id": "6a8eae87", "metadata": {}, "source": [ - "Note that CMOP Elliott Point does not appear in the timeseries filtered results." + "## Bring it all together" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "d99addd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "num_results: 95, result_count: 0\n", + "num_results: 95, result_count: 95\n", + "num_results: 95, result_count: 95, total_result_count: 95\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute - North' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NPBY1 - Point Wells: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NPBY2 - Carr Inlet: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Pt Wells' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NEMO - ChaBa Meteorlogical - Gill Metpak Pro' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 93\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'Se'lhaem, Bellingham Bay Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'ORCA3 - Hansville: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'ORCA1 - Twanoh: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Dabobbay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'OBIS Data - Puget Sound Zooplankton Monitoring Program (Salish Sea, USA), starting in 2014' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmd}DS_InitiativeTypeCode\n", + " Text: 'program'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'ORCA4 - Dabob Bay: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'Se'lhaem, Bellingham Bay Surface Hydrological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute Test' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NEMO - Cha'Ba, CTD Data, Near-realtime (incomplete data)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'time'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Twanoh' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Hoodsport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'Se'lhaem, Bellingham Bay Deep Hydrological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NEMO - Chaba Wind - Vaisala WMT 700 Anemometer' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for 'ORCA2 - Hoodsport: Meteorological Station Data' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 5 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'meteorological sensor'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'biological'\n", + " Score: 80\n", + "--------------------\n", + "Searching for 'NANOOS Mooring CB-06' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute - Center' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA Hansville' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'NANOOS Mooring ORCA NPB2Carr' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for 'Backyard Buoys - NANOOS - Washington: Quileute - South' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-02' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-04' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-03' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(APL-UW) UW/NANOOS NEMO Subsurface profiler near La Push' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(PennCoveShellfish) Penn Cove Shellfish, Quilcene - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WhiskeyCrShelfish) PCSGA - Whiskey Creek Shellfish Hatchery, Netarts Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(PSI) PCSGA - Bay Center Port mooring, Willapa Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) SATURN-07' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(SSNERR) SOSNSWQ Station - North Spit-BLM Boat Ramp near North Bend' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'North Head'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-09' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(WADOH) Burley Lagoon site, N end of Henderson Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for 'NANOOS Sensor Observation Service (SOS), a 52North IOOS SOS server' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(WADOH) Samish Bay site, mid bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOE) MCH01 Manchester/Clam Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Pickering Passage site, E shore near Graham Point' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(OSU) NH-10 Buoy, Newport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-08 (LOBO Station)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(NERRS) Station SOSECWQ - Elliot Creek, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'eliot'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Henderson Bay site, W shore' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Hood Canal 7 site, N shore of Hood Canal near Tahuya' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(StillaguamishTribe) Port Susan buoy, Stillaguamish Tribe' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(APL-UW) LSG NPB-2 Profiling Buoy at Carr Inlet' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Henderson Inlet site, S end' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(ORCA-UW) Profiling Buoy at Hoodsport - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(King County) Dockton Park Pier Mooring - Inner Quartermaster Harbor, Vashon Island' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOE) MUK01 Mukilteo' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(NERRS) Station SOSWIWQ - Winchester Arm, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(NWIC) Se`lhaem Bellingham Bay buoy' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 89\n", + "--------------------\n", + "Searching for '(NERRS SOS) Tom's Creek (sostcmet), South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(King County) Seattle Aquarium Mooring - Elliott Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'eliot'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Elliott Point' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Elliott Point'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'eliot'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Lower Sand Island light (USCG day mark green 5)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 3 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Lower Sand Island light (USCG day mark green 5)'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Marsh Island (USCG day mark green 21)'\n", + " Score: 81\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(FHL-UW) UW Friday Harbor Laboratories Cantilever Pt' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'CORIE'\n", + " Score: 80\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'abpoa'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Peale Passage site, W shore near Seafarm Cove' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(HMSC) Hatfield Marine Sci. Ctr. monitoring site, Newport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(NERRS) Station SOSCWQ - Charleston Bridge, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(PennCoveShellfish) Penn Cove Shellfish, Coupeville - Whidbey Island' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(Hakai Institute) Quadra Island Field Station at Hyacinthe Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Desdemona Sands Light' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Desdemona Sands Light'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Waste water outfall (City of Astoria)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Waste water outfall (City of Astoria)'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(WADOH) Oakland Bay site, Rafts by Chapman Cove' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(OSU) CB-06, 6NM W of Coos Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) SATURN River Station 05 (LOBO Station)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN River Station 05'\n", + " Score: 100\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 8 site, S shore of Hood Canal near Twanoh' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(ORCA-UW) Profiling Buoy at Hansville - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Tenasillahe Island (USFW dock)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Tenasillahe Island (USFW dock)'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Totten Inlet site, N end near Gallagher Cove' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(ORCA-UW) Profiling Buoy at Dabob Bay - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 9 site, N shore of Hood Canal near Belfair' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(VIU-CSR) VIU Deep Bay Marine Field Station, Baynes Sound' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(Hakai Institute) Seaology CO2 Buoy in Kwakshua Channel, BC' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) SATURN-01' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 13 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-08'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-10'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-07'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN-09'\n", + " Score: 89\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn08'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn10'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn01'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn02'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn03'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn04'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn05'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn07'\n", + " Score: 88\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'saturn09'\n", + " Score: 88\n", + "--------------------\n", + "Searching for '(ORCA-UW) Profiling Buoy at Twanoh - Hood Canal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(APL-UW) LSG NPB-1 Profiling Buoy at Pt. Wells' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(CMOP) Jetty A' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Jetty A'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'jetta'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Totten Inlet site, S end in Oyster Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) SATURN River Station 06 (USGS Station)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'SATURN River Station 05'\n", + " Score: 96\n", + "--------------------\n", + "Searching for '(NERRS) Station SOSVAWQ - Valino Island, South Slough Reserve' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Port Gamble site, E shore' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(King County) Point Williams buoy, South Seattle' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 5 site, S shore of Hood Canal near Hoodsport' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Hammersley Inlet site, S shore near Skookum Point' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(PSI) PCSGA - Nahcotta Port hatchery mooring, Willapa Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(King County) Yacht Club Mooring - Inner Quartermaster Harbor, Vashon Island' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 1 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'sandi'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(TaylorShellfish) PCSGA - Taylor Shellfish Hatchery intakes, Dabob Bay' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(FannyBayOysters) Fanny Bay Oysters, Baynes Sound' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(APL-UW) Ćháʔba· UW/NANOOS Moored Buoy near La Push' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 2 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'NANOOS'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gmx}Anchor\n", + " Text: 'BUOYS'\n", + " Score: 80\n", + "--------------------\n", + "Searching for '(WADOH) Hood Canal 1 site, W shore of Hood Canal near Case Shoal' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(CMOP) Grays Point (USCG day mark green 13)' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "Found 3 match(es):\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Grays Point (USCG day mark green 13)'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'grays'\n", + " Score: 100\n", + "--------------------\n", + " Tag: {http://www.isotc211.org/2005/gco}CharacterString\n", + " Text: 'Marsh Island (USCG day mark green 21)'\n", + " Score: 82\n", + "--------------------\n", + "Searching for '(WADOH) Skookum Inlet site, N shore near Deer Harbor' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n", + "Searching for '(WADOH) Eld Inlet site, W shore near Frye Cove Park' in 'https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-NANOOS;view=xml;responseType=text/xml' (cutoff score: 80)...\n", + "\n", + "No matches found.\n" + ] + } + ], + "source": [ + "from urllib.request import urlopen\n", + "import urllib.error\n", + "import stamina\n", + "\n", + "\n", + "@stamina.retry(on=urllib.error.HTTPError, attempts=3)\n", + "def _openurl_with_retry(url):\n", + " \"\"\"Thin wrapper around urlopen adding stamina.\"\"\"\n", + " return urlopen(url)\n", + "\n", + "ua = \"ckanapiioos/1.0 (+https://ioos.us/)\"\n", + "\n", + "ioos_catalog = RemoteCKAN(\"https://data.ioos.us\", user_agent=ua)\n", + "df_ioos_catalog = pd.DataFrame()\n", + "\n", + "orgs = [\"NANOOS\"]\n", + "\n", + "for org in orgs:\n", + " org_ncei = org.lower()\n", + "\n", + " filter_query = f''\n", + "\n", + " free_text_query = f'organization:{org_ncei.lower()} NOT (glider OR model)'\n", + "\n", + " df_search = ioos_ckan_query(ioos_catalog, filter_query, free_text_query)\n", + "\n", + " df_ioos_catalog = pd.concat([df_ioos_catalog, df_search], ignore_index=True)\n", + "\n", + " XML_FILE = f\"https://www.ncei.noaa.gov/access/metadata/landing-page/bin/iso?id=gov.noaa.nodc:IOOS-{organization};view=xml;responseType=text/xml\"\n", + "\n", + " iso = _openurl_with_retry(XML_FILE)\n", + " tree = ET.parse(iso)\n", + "\n", + " for index, dataset in df_search.iterrows():\n", + " # Example usage\n", + " organization = org\n", + " \n", + " SEARCH_QUERY = dataset['title'] \n", + " SCORE_CUTOFF = 80 # Adjust this value to make the search more or less strict\n", + "\n", + " print(f\"Searching for '{SEARCH_QUERY}' in '{XML_FILE}' (cutoff score: {SCORE_CUTOFF})...\\n\")\n", + "\n", + " # Perform the search\n", + " results = fuzzy_search_in_xml(tree, SEARCH_QUERY, SCORE_CUTOFF)\n", + "\n", + " # Display the results\n", + " if results:\n", + " print(f\"Found {len(results)} match(es):\")\n", + " for result in results:\n", + " print(\"-\" * 20)\n", + " print(f\" Tag: {result['tag']}\")\n", + " print(f\" Text: '{result['text']}'\")\n", + " print(f\" Score: {result['score']}\")\n", + " print(\"-\" * 20)\n", + " else:\n", + " print(\"No matches found.\")\n", + "\n", + " " ] }, { "cell_type": "code", - "execution_count": 117, - "id": "b5d04cb2", + "execution_count": 48, + "id": "cfcff229", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titleorgstart_dateend_datedatatype
128(CMOP) Elliott PointNANOOS2018-01-02T08:33:43+00:002019-02-10T21:27:42+00:00None
\n", - "
" - ], "text/plain": [ - " title org start_date \\\n", - "128 (CMOP) Elliott Point NANOOS 2018-01-02T08:33:43+00:00 \n", - "\n", - " end_date datatype \n", - "128 2019-02-10T21:27:42+00:00 None " + "0 Backyard Buoys - NANOOS - Washington: Quileute...\n", + "1 NPBY1 - Point Wells: Meteorological Station Data\n", + "2 NPBY2 - Carr Inlet: Meteorological Station Data\n", + "3 NANOOS Mooring ORCA Pt Wells\n", + "4 NEMO - ChaBa Meteorlogical - Gill Metpak Pro\n", + " ... \n", + "90 (APL-UW) Ćháʔba· UW/NANOOS Moore...\n", + "91 (WADOH) Hood Canal 1 site, W shore of Hood Can...\n", + "92 (CMOP) Grays Point (USCG day mark green 13)\n", + "93 (WADOH) Skookum Inlet site, N shore near Deer ...\n", + "94 (WADOH) Eld Inlet site, W shore near Frye Cove...\n", + "Name: title, Length: 95, dtype: object" ] }, - "execution_count": 117, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df_ioos_catalog.loc[df_ioos_catalog['title'].str.contains('Elliott Point')]" + "df_search['title']" ] }, { "cell_type": "code", "execution_count": null, - "id": "8607197e", + "id": "d629e788", "metadata": {}, "outputs": [], "source": []