From fcd49be02ac6ff8a0945c78e43593b285afd6237 Mon Sep 17 00:00:00 2001 From: g Date: Mon, 9 May 2022 04:41:31 +0300 Subject: [PATCH] add: proto --- .gitignore | 2 + __init__.py | 0 main.py | 301 ++++++++++++++++++++++++++++++++++++++++++++ noaa.code-workspace | 15 +++ requirements.txt | 26 ++++ 5 files changed, 344 insertions(+) create mode 100644 .gitignore create mode 100644 __init__.py create mode 100644 main.py create mode 100644 noaa.code-workspace create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb3a37f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv +*.pyc diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..5a99037 --- /dev/null +++ b/main.py @@ -0,0 +1,301 @@ +import os +import requests +import xarray as xr + +from fastapi import FastAPI +from numpy import arctan2, degrees, isnan +from requests.compat import urljoin, urlparse +from time import strftime, gmtime, sleep +from bs4 import BeautifulSoup +from bs4.element import Tag + +app = FastAPI() + + +@app.get("/download/") +def start_download(): + """ + Download most recent available models from ncep.noaa.gov + """ + soup = BeautifulSoup( + requests.get("https://www.nco.ncep.noaa.gov/pmb/products/gfs/").text, + "html.parser", + ) + base_url = ( + soup.find("font", string="Most commonly used parameters") + .parent.find_next_sibling("td") + .find("a", string="Available in GRIB2 via https")["href"] + ) + prodlist = BeautifulSoup( + requests.get("https://nomads.ncep.noaa.gov/pub/data/nccf/com/gfs/prod").text, + "html.parser", + ) + date_url = urljoin( + base=base_url + "/", + url=prodlist.find( + "a", string="".join(["gfs.", strftime("%Y%m%d", gmtime()), "/"]) + )["href"] + if prodlist.find( + "a", string="".join(["gfs.", strftime("%Y%m%d", gmtime()), "/"]) + ) + else prodlist.find("a", string="".join(["gfs.", "20220508", "/"]))[ + "href" + ], # TODO[2]: use datetime and fallback to the previous day gracefully + ) + hour_list = BeautifulSoup( + requests.get(urljoin(base=base_url + "/", url=date_url)).text, "html.parser" + ) + latest_hour = max(hour_list.findAll("a"), key=pull_text).text.removesuffix( + "/" + ) # TODO:this may be unavailable still, fallback to previous hour + + hour_url = urljoin( + base=date_url, url=max(hour_list.findAll("a"), key=pull_text)["href"] + ) + + atmos_url = urljoin(base=hour_url, url="atmos/") + + prod_hour = "004" # TODO[1]: do it for 000 - 384 forecast hours of product + gfs_url = urljoin( + base=atmos_url, + url="".join(["gfs.t", latest_hour, "z.pgrb2.0p25.f", prod_hour]), + ) + + if not requests.head( + urljoin( + base=atmos_url, url="".join(["gfs.t", latest_hour, "z.pgrb2.0p25.f", "384"]) + ) + ): + raise Exception("unavailable still, fallback to previous hour") + + # print(download_file(gfs_url)) + wave_url = urljoin(base=hour_url, url="wave/gridded/") + gfswave_url = urljoin( + base=wave_url, + url="".join(["gfswave.t", latest_hour, "z.global.0p25.f", prod_hour, ".grib2"]), + ) + + if not requests.head( + urljoin( + base=gfswave_url, + url="".join(["gfswave.t", latest_hour, "z.global.0p25.f", "384", ".grib2"]), + ) + ): + raise Exception("unavailable still, fallback to previous hour") + + print(download_file(gfswave_url)) + + +def pull_text(tag: Tag): + try: + return int(tag.text.removesuffix("/")) + except ValueError: + return -1 + + +def download_file(url: str, file_path="/tmp/grib/", attempts=2): + """Downloads a URL content into a file (with large file support by streaming) + + :param url: URL to download + :param file_path: Local file name to contain the data downloaded + :param attempts: Number of attempts + :return: New file path. Empty string if the download failed + """ + if not file_path: + file_path = os.path.realpath(os.path.basename(url)) + if os.path.isdir(file_path): + file_path = os.path.join(file_path, os.path.basename(url)) + # logger.info(f'Downloading {url} content to {file_path}') + url_sections = urlparse(url) + if not url_sections.scheme: + # logger.debug('The given url is missing a scheme. Adding http scheme') + url = f"http://{url}" + # logger.debug(f'New url: {url}') + for attempt in range(1, attempts + 1): + try: + if attempt > 1: + sleep(10) # 10 seconds wait time between downloads + with requests.get(url, stream=True) as response: + response.raise_for_status() + with open(file_path, "wb") as out_file: + for chunk in response.iter_content( + chunk_size=1024 * 1024 * 8 + ): # 8MB chunks + out_file.write(chunk) + # logger.info('Download finished successfully') + return file_path + except Exception as ex: + print(ex) + # logger.error(f'Attempt #{attempt} failed with error: {ex}') + return "" + + +@app.get("/weather_dot/") +def weather_dot(lat, lon, prediction_time="000"): + response = {} + latest_hour = "18" # TODO[3]: global? singleton? make user choose? + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "heightAboveGround", "level": 10}, + ) as ds: + wind_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + u_wind = wind_dot.get("u10").item() + v_wind = wind_dot.get("v10").item() + response.update( + u_wind=u_wind, + v_wind=v_wind, + # http://colaweb.gmu.edu/dev/clim301/lectures/wind/wind-uv + wind_speed=(u_wind**2 + v_wind**2) ** 0.5, + wind_alpha=degrees(arctan2(v_wind, u_wind)), + ) + + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "heightAboveGround", "level": 2}, + ) as ds: + two_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update( + temperature=two_dot.get("t2m").item(), + dew_point=two_dot.get("d2m").item(), + humidity=two_dot.get("r2").item(), + ) + if int(prediction_time) > 0: + response.update( + min_temp=two_dot.get("tmin").item(), max_temp=two_dot.get("tmax").item() + ) + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "meanSea"}, + ) as ds: + sea_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update(pressure=sea_dot.get("prmsl").item() / 1000) # in GPa + + # things differ for the first and the latter for the rest + if int(prediction_time) > 0: + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "surface", "stepType": "instant"}, + ) as ds: + surface_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update( + visibility=surface_dot.get("vis").item() / 1000, # in km + wind_gust=surface_dot.get("gust").item(), + frozen_precip=surface_dot.get("cpofp").item(), + ) + + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "surface", "stepType": "accum"}, + ) as ds: + surface_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update(total_precip=surface_dot.get("tp").item()) + + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "lowCloudLayer", "stepType": "instant"}, + ) as ds: + low_cloud_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update(low_clouds=low_cloud_dot.get("lcc").item()) + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "atmosphere", "stepType": "instant"}, + ) as ds: + total_cloud_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update(total_clouds=total_cloud_dot.get("tcc").item()) + + with xr.open_dataset( + f"/tmp/grib/gfswave.t{latest_hour}z.global.0p25.f{prediction_time}.grib2", + engine="cfgrib", + # filter_by_keys={"stepType": "avg"}, + ) as ds: + wave_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + if not isnan(wave_dot.get("swh").item()): # check one for swell waves + response.update( + wind_and_swell_wave_height=wave_dot.get("swh").item(), + swell_wave_direction=wave_dot.get("swdir").values.tolist(), + swell_wave_period=wave_dot.get("swper").values.tolist(), + ) + if not isnan(wave_dot.get("shww").item()): # and one for wind waves + response.update( + wind_wave_height=wave_dot.get("shww").item(), + wind_wave_direction=wave_dot.get("wvdir").item(), + wind_wave_period=wave_dot.get("mpww").item(), + ) + + else: + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "surface"}, + ) as ds: + surface_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update( + visibility=surface_dot.get("vis").item() / 1000, # in km + wind_gust=surface_dot.get("gust").item(), + frozen_precip=surface_dot.get("cpofp").item(), + ) + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "lowCloudLayer"}, + ) as ds: + low_cloud_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update(low_clouds=low_cloud_dot.get("lcc").item()) + + with xr.open_dataset( + f"/tmp/grib/gfs.t{latest_hour}z.pgrb2.0p25.f{prediction_time}", + engine="cfgrib", + filter_by_keys={"typeOfLevel": "atmosphere"}, + ) as ds: + total_cloud_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + response.update(total_clouds=total_cloud_dot.get("tcc").item()) + + with xr.open_dataset( + f"/tmp/grib/gfswave.t{latest_hour}z.global.0p25.f{prediction_time}.grib2", + engine="cfgrib", + ) as ds: + wave_dot = ds.sel( + latitude=lat, longitude=lon + ) # TODO[0]: Do we want method="nearest" or exact here + if not isnan( + wave_dot.get("swh").item() + ): # just check one, should be enoughto tell if it is water + response.update( + wind_and_swell_wave_height=wave_dot.get("swh").item(), + wind_wave_height=wave_dot.get("shww").item(), + wind_wave_direction=wave_dot.get("wvdir").item(), + swell_wave_direction=wave_dot.get("swdir").values.tolist(), + swell_wave_period=wave_dot.get("swper").values.tolist(), + wind_wave_period=wave_dot.get("mpww").item(), + ) + + return response diff --git a/noaa.code-workspace b/noaa.code-workspace new file mode 100644 index 0000000..a07691a --- /dev/null +++ b/noaa.code-workspace @@ -0,0 +1,15 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": { + "python.defaultInterpreterPath": ".venv/bin/python", + "python.formatting.blackPath": "/usr/bin/black", + "python.linting.flake8Path": "/usr/bin/flake8", + "python.linting.flake8Args": [ + "--black-config=''" + ] + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..55a4682 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,26 @@ +anyio==3.5.0 +attrs==21.4.0 +beautifulsoup4==4.11.1 +bs4==0.0.1 +cffi==1.15.0 +cfgrib==0.9.10.1 +click==8.1.3 +eccodes==1.4.1 +fastapi==0.76.0 +findlibs==0.0.2 +idna==3.3 +numpy==1.22.3 +packaging==21.3 +pandas==1.4.2 +pycparser==2.21 +pydantic==1.9.0 +pyparsing==3.0.8 +python-dateutil==2.8.2 +pytz==2022.1 +requests==2.27.1 +six==1.16.0 +sniffio==1.2.0 +soupsieve==2.3.2.post1 +starlette==0.18.0 +typing_extensions==4.2.0 +xarray==2022.3.0