From 9fba7cd20f6c283cac64dc6247ef299e1b45b055 Mon Sep 17 00:00:00 2001 From: AlexP077 Date: Wed, 25 Oct 2023 20:29:25 +0300 Subject: [PATCH] fix_2510 --- docker-compose.yml | 2 +- service/service.py | 194 ++++++++++++++++++++++++--------------------- service/tasks.py | 28 ++++--- 3 files changed, 119 insertions(+), 105 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 3d08407..397a3f4 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,7 +31,7 @@ services: python manage.py collectstatic --noinput && python manage.py loaddata fixtures/groups.json && python manage.py loaddata fixtures/post_and_pvz.json && - python manage.py loaddata fixtures/post_and_pvz_groups.json && + python manage.py loaddata fixtures/post_pvz_groups.json && python manage.py loaddata fixtures/otherobjectscategorys.json && python manage.py loaddata fixtures/otherobjectsgroups.json && python manage.py runserver 0.0.0.0:${DJANGO_PORT}" diff --git a/service/service.py b/service/service.py index fae7844..0dd5f26 100644 --- a/service/service.py +++ b/service/service.py @@ -152,96 +152,107 @@ class PointService: dist__lt=Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS) ).count() point.target_cnt_ao_mean = qs[0].target_cnt_ao_mean - # point.rival_post_cnt = models.Post_and_pvz.objects.filter( - # category__name="Постаматы прочих сетей", include_in_ml=True, - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.rival_pvz_cnt = models.Post_and_pvz.objects.filter( - # category__name="ПВЗ", include_in_ml=True, - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.metro_dist = models.OtherObjects.objects.filter(group__name='metro_stations').annotate( - # dist=Dist('wkt', origin)).order_by('dist')[0].dist.m - # point.property_price_bargains = models.OtherObjects.objects.filter( - # group__name="bargains", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate(Avg('param1'))[ - # 'param1__avg'] - # offers_estate = models.OtherObjects.objects.filter( - # group__name="offers_estate", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( - # param1__avg=Avg('param1'), param3__avg=Avg('param3')) - # point.property_price_offers = offers_estate['param1__avg'] - # point.property_mean_floor = offers_estate['param3__avg'] - # point.property_era = models.OtherObjects.objects.filter( - # group__name="offers_estate").values('param2').annotate(cnt=Count('param2')).order_by('-cnt').first()[ - # 'param2'] - # point.flats_cnt = models.OtherObjects.objects.filter( - # group__name="flats_cnt", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( - # param1__sum=Sum('param1'))['param1__sum'] - # popul_home_job = models.OtherObjects.objects.filter( - # group__name="popul_home_job", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( - # param1__sum=Sum('param1'), param3__sum=Sum('param3')) - # point.popul_home = popul_home_job['param1__sum'] - # point.popul_job = popul_home_job['param3__sum'] - # yndx_food_cnt_amt = models.OtherObjects.objects.filter( - # group__name="yndx_food_cnt_amt", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( - # param1__sum=Sum('param1'), param3__sum=Sum('param3')) - # point.yndxfood_sum = yndx_food_cnt_amt['param1__sum'] - # point.yndxfood_cnt = yndx_food_cnt_amt['param3__sum'] - # point.school_cnt = models.OtherObjects.objects.filter( - # group__name="schools", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.kindergar_cnt = models.OtherObjects.objects.filter( - # group__name="kindergar", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.public_stop_cnt = models.OtherObjects.objects.filter( - # group__name="stops", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.sport_center_cnt = models.OtherObjects.objects.filter( - # group__name="sport_centers", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.pharmacy_cnt = models.OtherObjects.objects.filter( - # group__name="pharmacies", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.supermarket_cnt = models.OtherObjects.objects.filter( - # group__name="supermarkets", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.supermarket_premium_cnt = models.OtherObjects.objects.filter( - # group__name="supermarkets_premium", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.clinic_cnt = models.OtherObjects.objects.filter( - # group__name="clinics", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.bank_cnt = models.OtherObjects.objects.filter( - # group__name="banks", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.reca_cnt = models.OtherObjects.objects.filter( - # group__name="recas", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.lab_cnt = models.OtherObjects.objects.filter( - # group__name="labs", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.culture_cnt = models.OtherObjects.objects.filter( - # group__name="cultures", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.attraction_cnt = models.OtherObjects.objects.filter( - # group__name="attractions", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.mfc_cnt = models.OtherObjects.objects.filter( - # group__name="public_services", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.bc_cnt = models.OtherObjects.objects.filter( - # group__name="BC", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.tc_cnt = models.OtherObjects.objects.filter( - # group__name="TC", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() - # point.business_activity = models.OtherObjects.objects.filter( - # group__name="business_activity", - # wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( - # param1__sum=Sum('param1'))['param1__sum'] - # point.age_day = AGE_DAY_LIMIT + point.rival_post_cnt = models.Post_and_pvz.objects.filter( + category__name="Постаматы прочих сетей", include_in_ml=True, + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.rival_pvz_cnt = models.Post_and_pvz.objects.filter( + category__name="ПВЗ", include_in_ml=True, + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + metro = models.OtherObjects.objects.filter(group__name='metro_stations').annotate( + dist=Dist('wkt', origin)).order_by('dist') + if metro: + point.metro_dist = metro[0].dist.m + bargains = models.OtherObjects.objects.filter( + group__name="bargains", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate(Avg('param1')) + if bargains: + point.property_price_bargains = bargains['param1__avg'] + offers_estate = models.OtherObjects.objects.filter( + group__name="offers_estate", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( + param1__avg=Avg('param1'), param3__avg=Avg('param3')) + if offers_estate: + point.property_price_offers = offers_estate['param1__avg'] + point.property_mean_floor = offers_estate['param3__avg'] + offers_estate = models.OtherObjects.objects.filter( + group__name="offers_estate").values('param2').annotate(cnt=Count('param2')).order_by('-cnt').first() + if offers_estate: + point.property_era = offers_estate['param2'] + flats_cnt = models.OtherObjects.objects.filter( + group__name="flats_cnt", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( + param1__sum=Sum('param1')) + if flats_cnt: + point.flats_cnt = flats_cnt['param1__sum'] + popul_home_job = models.OtherObjects.objects.filter( + group__name="popul_home_job", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( + param1__sum=Sum('param1'), param3__sum=Sum('param3')) + if popul_home_job: + point.popul_home = popul_home_job['param1__sum'] + point.popul_job = popul_home_job['param3__sum'] + yndx_food_cnt_amt = models.OtherObjects.objects.filter( + group__name="yndx_food_cnt_amt", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( + param1__sum=Sum('param1'), param3__sum=Sum('param3')) + if yndx_food_cnt_amt: + point.yndxfood_sum = yndx_food_cnt_amt['param1__sum'] + point.yndxfood_cnt = yndx_food_cnt_amt['param3__sum'] + point.school_cnt = models.OtherObjects.objects.filter( + group__name="schools", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.kindergar_cnt = models.OtherObjects.objects.filter( + group__name="kindergar", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.public_stop_cnt = models.OtherObjects.objects.filter( + group__name="stops", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.sport_center_cnt = models.OtherObjects.objects.filter( + group__name="sport_centers", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.pharmacy_cnt = models.OtherObjects.objects.filter( + group__name="pharmacies", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.supermarket_cnt = models.OtherObjects.objects.filter( + group__name="supermarkets", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.supermarket_premium_cnt = models.OtherObjects.objects.filter( + group__name="supermarkets_premium", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.clinic_cnt = models.OtherObjects.objects.filter( + group__name="clinics", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.bank_cnt = models.OtherObjects.objects.filter( + group__name="banks", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.reca_cnt = models.OtherObjects.objects.filter( + group__name="recas", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.lab_cnt = models.OtherObjects.objects.filter( + group__name="labs", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.culture_cnt = models.OtherObjects.objects.filter( + group__name="cultures", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.attraction_cnt = models.OtherObjects.objects.filter( + group__name="attractions", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.mfc_cnt = models.OtherObjects.objects.filter( + group__name="public_services", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.bc_cnt = models.OtherObjects.objects.filter( + group__name="BC", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + point.tc_cnt = models.OtherObjects.objects.filter( + group__name="TC", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).count() + business_activity = models.OtherObjects.objects.filter( + group__name="business_activity", + wkt__distance_lt=(origin, Distance(m=DEFAULT_PLACEMENT_POINT_UPDATE_RADIUS))).aggregate( + param1__sum=Sum('param1')) + if business_activity: + point.business_activity = business_activity['param1__sum'] + point.age_day = AGE_DAY_LIMIT placement_point = models.PlacementPoint.objects.annotate( dist=Dist('geometry', origin)).order_by('dist') if placement_point: @@ -258,8 +269,7 @@ class PointService: @staticmethod def calculate_dist_for_group(point, group, instance_type=models.PlacementPointPVZDistance): - post_object = models.Post_and_pvz.objects.filter(group__name=group.name, - group__category=group.category).annotate( + post_object = models.Post_and_pvz.objects.filter(group__id=group.id).annotate( distance=Dist("wkt", point.geometry)).order_by('distance').first() d = instance_type.objects.filter(placement_point=point, pvz_postamates_group=group).first() diff --git a/service/tasks.py b/service/tasks.py index 935afe6..c1370f7 100644 --- a/service/tasks.py +++ b/service/tasks.py @@ -115,7 +115,6 @@ def raschet(table_name='service_placementpoint', need_time=True, task_name=STATU X_trn = pts_trn[feats].drop(columns=['id']) Y_trn = pts_trn[['fact']] - # status.status = 'Записи для инференса' # status.save() change_status('Записи для инференса', task_name=task_name) @@ -147,19 +146,23 @@ def raschet(table_name='service_placementpoint', need_time=True, task_name=STATU target_feature_coords = np.array(target_feature_coords) pts_inf['target_dist'] = pts_inf.apply( - lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[1]) if ((x.status == 'Working') or (x.status == 'Installation')) else + lambda x: ( + (sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[1]) if ( + (x.status == 'Working') or (x.status == 'Installation')) else (sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[0])), axis=1, ) pts_inf.loc[pts_inf.target_dist > 700, 'target_dist'] = 700 pts_inf = pts_inf.sort_values(by='id').reset_index(drop=True) - target_post = gpd.sjoin(pts_inf, pts_target, op='contains').groupby('id', as_index=False).agg({'cnt': 'count'}) + target_post = gpd.sjoin(pts_inf, pts_target, op='contains').groupby('id', as_index=False).agg( + {'cnt': 'count'}) target_post = target_post.rename(columns={'cnt': 'target_post_cnt'}) pts_inf = pts_inf.drop(columns=['target_post_cnt']) pts_inf = pts_inf.join(target_post.set_index('id'), on='id') pts_inf['target_post_cnt'] = pts_inf['target_post_cnt'].fillna(0) - pts_inf['target_post_cnt'] = pts_inf.apply(lambda x: ((x.target_post_cnt - 1) if ((x.status == 'Working') or (x.status == 'Installation')) else x.target_post_cnt), axis=1) + pts_inf['target_post_cnt'] = pts_inf.apply(lambda x: ((x.target_post_cnt - 1) if ( + (x.status == 'Working') or (x.status == 'Installation')) else x.target_post_cnt), axis=1) pts_inf['age_day_init'] = pts_inf['age_day'] pts_inf['age_day'] = 240 X_inf = pts_inf[feats] @@ -176,7 +179,8 @@ def raschet(table_name='service_placementpoint', need_time=True, task_name=STATU for i in seeds: # status.status = 'Обучение inference: ' + str(int((seeds.index(i) + 1) / len(seeds) * 100)) + '%' # status.save() - change_status(f'Обучение inference: {str(int((seeds.index(i) + 1) / len(seeds) * 100))}%', task_name=task_name) + change_status(f'Обучение inference: {str(int((seeds.index(i) + 1) / len(seeds) * 100))}%', + task_name=task_name) x_trn, x_test, y_trn, y_test = ms.train_test_split(X_trn, Y_trn, test_size=0.2, random_state=i) model = catboost.CatBoostRegressor(cat_features=['property_era'], random_state=i) model.fit(x_trn, y_trn, verbose=False) @@ -195,14 +199,15 @@ def raschet(table_name='service_placementpoint', need_time=True, task_name=STATU shap_values = explainer(X_inf.drop(columns=['id'])) shap_fields = pd.DataFrame(shap_values.values) shap_fields.columns = X_inf.drop(columns=['id']).columns + '_shap' - shap_fields = shap_fields.drop(columns = ['age_day_shap']) + shap_fields = shap_fields.drop(columns=['age_day_shap']) shap_fields['sum'] = abs(shap_fields).sum(axis=1) - shap_fields = round(shap_fields.iloc[:,:32].div(shap_fields['sum'], axis=0)*100, 2) + shap_fields = round(shap_fields.iloc[:, :32].div(shap_fields['sum'], axis=0) * 100, 2) # Обновление полей по результатам работы модели update_fields = pts_inf[ [ - 'id', 'age_day_init', 'status', 'fact', 'delta_current', 'delta_first', 'plan_current', 'plan_first', + 'id', 'age_day_init', 'status', 'fact', 'delta_current', 'delta_first', 'plan_current', + 'plan_first', 'prediction_first', 'target_post_cnt', 'target_dist' ] ] @@ -227,7 +232,8 @@ def raschet(table_name='service_placementpoint', need_time=True, task_name=STATU axis=1, ) update_fields['plan_current'] = update_fields.apply( - lambda x: (x.prediction_current * interpolate.splev(x.age_day_init, spl) if x.status == 'Working' else 0), + lambda x: ( + x.prediction_current * interpolate.splev(x.age_day_init, spl) if x.status == 'Working' else 0), axis=1, ) update_fields['delta_first'] = update_fields.apply( @@ -387,7 +393,6 @@ def raschet(table_name='service_placementpoint', need_time=True, task_name=STATU LastMLCall.objects.create() - @shared_task def load_post_and_pvz(obj_id: int): file = models.TempFiles.objects.get(id=obj_id) @@ -424,7 +429,7 @@ def load_post_and_pvz(obj_id: int): for _j, point in enumerate(points): status.status = "Подсчет расстояний: " + str(int((num_points * _i + _j) / total * 100)) + "%" status.save() - post_object = models.Post_and_pvz.objects.filter(group__name=group.name,group__category=group.category).annotate( + post_object = models.Post_and_pvz.objects.filter(group__id=group.id).annotate( distance=Distance("wkt", point.geometry)).order_by('distance').first() d = models.PlacementPointPVZDistance.objects.filter(placement_point=point, pvz_postamates_group=group).first() @@ -449,7 +454,6 @@ def load_post_and_pvz(obj_id: int): run_psql_command() - @shared_task() def add_age_day(): qs = PlacementPoint.objects.filter(status='Working')