first commit

master
g 10 months ago
commit c19264f6b8

5
.gitignore vendored

@ -0,0 +1,5 @@
tiles/
geojson/
geojson2024/
nohup.out
geojsons.tar.gz

@ -0,0 +1,43 @@
from datetime import date, timedelta
from pathlib import Path
from curl_cffi import requests
# from tqdm import tqdm
start_date = date(2024, 1, 1)
end_date = date(2025, 1, 1)
delta = end_date - start_date
dates = [str(start_date + timedelta(days=i)) for i in range(delta.days)]
z = 8
xs = [153, 154, 155, 156]
ys = [79, 80]
hours = range(24)
vars = [
{"z": z, "x": x, "y": y, "hour": hour, "date": date}
for date in dates
for hour in hours
for x in xs
for y in ys
]
print(len(vars))
Path("./tiles").mkdir(parents=True, exist_ok=True)
def write_tiles(v):
date = v["date"]
hour = v["hour"]
z = v["z"]
x = v["x"]
y = v["y"]
url = f"https://prodvizhenie.mos.ru/tiles/v1/taxi/ride-start/{z}/{x}/{y}.pbf?hours={hour},{hour}&weekdays=1,2,3,4,5,6,7&dates={date},{date}&aggregation=h3"
r = requests.get(url, impersonate="chrome")
with open(Path(f"./tiles/{date}_{hour}_{z}_{x}_{y}.pbf"), "wb") as f:
f.write(r.content)
[write_tiles(v) for v in vars]

@ -0,0 +1,25 @@
import os
import pandas as pd
# from tqdm import tqdm
jsons = os.listdir("geojson")
def read_properties(json_filename):
df_json = pd.read_json(f"geojson/{json_filename}")
filename_parts = os.path.splitext(json_filename)[0].split("_")
df_json["date"] = filename_parts[0]
df_json["hour"] = filename_parts[1]
df_json.drop("address", axis="columns", inplace=True)
return df_json
dfs = (read_properties(j) for j in jsons)
df = pd.concat(dfs, ignore_index=True)
print(df.shape)
clean_df = df.drop_duplicates()
print(clean_df.shape)
clean_df.to_parquet("2023.parquet", index=False)
print("finish")

@ -0,0 +1,28 @@
import json
import os
import mapbox_vector_tile
from tqdm import tqdm
tiles = os.listdir("tiles")
tiles = [t for t in tiles if t[:4] == "2024"]
def to_geojson(tile):
tile_path = f"tiles/{tile}"
with open(tile_path, "rb") as f:
pbf = f.read()
decoded = mapbox_vector_tile.decode(pbf)
features = decoded["travel"]["features"]
if len(features) == 0:
return
else:
features_properties = [f["properties"] for f in features]
filename = os.path.splitext(tile)[0]
with open(f"geojson2024/{filename}.geojson", "w") as f:
json.dump(features_properties, f, ensure_ascii=False)
[to_geojson(t) for t in tiles]
print("end")
Loading…
Cancel
Save