You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
26 lines
622 B
26 lines
622 B
import os
|
|
|
|
import pandas as pd
|
|
|
|
# from tqdm import tqdm
|
|
|
|
jsons = os.listdir("geojson")
|
|
|
|
|
|
def read_properties(json_filename):
|
|
df_json = pd.read_json(f"geojson/{json_filename}")
|
|
filename_parts = os.path.splitext(json_filename)[0].split("_")
|
|
df_json["date"] = filename_parts[0]
|
|
df_json["hour"] = filename_parts[1]
|
|
df_json.drop("address", axis="columns", inplace=True)
|
|
return df_json
|
|
|
|
|
|
dfs = (read_properties(j) for j in jsons)
|
|
df = pd.concat(dfs, ignore_index=True)
|
|
print(df.shape)
|
|
clean_df = df.drop_duplicates()
|
|
print(clean_df.shape)
|
|
clean_df.to_parquet("2023.parquet", index=False)
|
|
print("finish")
|