You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

26 lines
622 B

import os
import pandas as pd
# from tqdm import tqdm
jsons = os.listdir("geojson")
def read_properties(json_filename):
df_json = pd.read_json(f"geojson/{json_filename}")
filename_parts = os.path.splitext(json_filename)[0].split("_")
df_json["date"] = filename_parts[0]
df_json["hour"] = filename_parts[1]
df_json.drop("address", axis="columns", inplace=True)
return df_json
dfs = (read_properties(j) for j in jsons)
df = pd.concat(dfs, ignore_index=True)
print(df.shape)
clean_df = df.drop_duplicates()
print(clean_df.shape)
clean_df.to_parquet("2023.parquet", index=False)
print("finish")