import os import pandas as pd # from tqdm import tqdm jsons = os.listdir("geojson") def read_properties(json_filename): df_json = pd.read_json(f"geojson/{json_filename}") filename_parts = os.path.splitext(json_filename)[0].split("_") df_json["date"] = filename_parts[0] df_json["hour"] = filename_parts[1] df_json.drop("address", axis="columns", inplace=True) return df_json dfs = (read_properties(j) for j in jsons) df = pd.concat(dfs, ignore_index=True) print(df.shape) clean_df = df.drop_duplicates() print(clean_df.shape) clean_df.to_parquet("2023.parquet", index=False) print("finish")