commit
c19264f6b8
@ -0,0 +1,5 @@
|
||||
tiles/
|
||||
geojson/
|
||||
geojson2024/
|
||||
nohup.out
|
||||
geojsons.tar.gz
|
||||
@ -0,0 +1,43 @@
|
||||
from datetime import date, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from curl_cffi import requests
|
||||
|
||||
# from tqdm import tqdm
|
||||
|
||||
start_date = date(2024, 1, 1)
|
||||
end_date = date(2025, 1, 1)
|
||||
delta = end_date - start_date
|
||||
dates = [str(start_date + timedelta(days=i)) for i in range(delta.days)]
|
||||
|
||||
z = 8
|
||||
xs = [153, 154, 155, 156]
|
||||
ys = [79, 80]
|
||||
hours = range(24)
|
||||
|
||||
vars = [
|
||||
{"z": z, "x": x, "y": y, "hour": hour, "date": date}
|
||||
for date in dates
|
||||
for hour in hours
|
||||
for x in xs
|
||||
for y in ys
|
||||
]
|
||||
print(len(vars))
|
||||
|
||||
Path("./tiles").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def write_tiles(v):
|
||||
date = v["date"]
|
||||
hour = v["hour"]
|
||||
z = v["z"]
|
||||
x = v["x"]
|
||||
y = v["y"]
|
||||
|
||||
url = f"https://prodvizhenie.mos.ru/tiles/v1/taxi/ride-start/{z}/{x}/{y}.pbf?hours={hour},{hour}&weekdays=1,2,3,4,5,6,7&dates={date},{date}&aggregation=h3"
|
||||
r = requests.get(url, impersonate="chrome")
|
||||
with open(Path(f"./tiles/{date}_{hour}_{z}_{x}_{y}.pbf"), "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
|
||||
[write_tiles(v) for v in vars]
|
||||
@ -0,0 +1,25 @@
|
||||
import os
|
||||
|
||||
import pandas as pd
|
||||
|
||||
# from tqdm import tqdm
|
||||
|
||||
jsons = os.listdir("geojson")
|
||||
|
||||
|
||||
def read_properties(json_filename):
|
||||
df_json = pd.read_json(f"geojson/{json_filename}")
|
||||
filename_parts = os.path.splitext(json_filename)[0].split("_")
|
||||
df_json["date"] = filename_parts[0]
|
||||
df_json["hour"] = filename_parts[1]
|
||||
df_json.drop("address", axis="columns", inplace=True)
|
||||
return df_json
|
||||
|
||||
|
||||
dfs = (read_properties(j) for j in jsons)
|
||||
df = pd.concat(dfs, ignore_index=True)
|
||||
print(df.shape)
|
||||
clean_df = df.drop_duplicates()
|
||||
print(clean_df.shape)
|
||||
clean_df.to_parquet("2023.parquet", index=False)
|
||||
print("finish")
|
||||
@ -0,0 +1,28 @@
|
||||
import json
|
||||
import os
|
||||
|
||||
import mapbox_vector_tile
|
||||
from tqdm import tqdm
|
||||
|
||||
tiles = os.listdir("tiles")
|
||||
tiles = [t for t in tiles if t[:4] == "2024"]
|
||||
|
||||
|
||||
def to_geojson(tile):
|
||||
tile_path = f"tiles/{tile}"
|
||||
with open(tile_path, "rb") as f:
|
||||
pbf = f.read()
|
||||
decoded = mapbox_vector_tile.decode(pbf)
|
||||
features = decoded["travel"]["features"]
|
||||
if len(features) == 0:
|
||||
return
|
||||
else:
|
||||
features_properties = [f["properties"] for f in features]
|
||||
|
||||
filename = os.path.splitext(tile)[0]
|
||||
with open(f"geojson2024/{filename}.geojson", "w") as f:
|
||||
json.dump(features_properties, f, ensure_ascii=False)
|
||||
|
||||
|
||||
[to_geojson(t) for t in tiles]
|
||||
print("end")
|
||||
Loading…
Reference in new issue