ml_func_3+add_filter

dev
AlexP077 3 years ago committed by Dmitry Titov
parent 0ab7278a60
commit 7e016078f0

@ -7,4 +7,4 @@ per-file-ignores =
service/migrations/*:E501 service/migrations/*:E501
service/views.py:C901 service/views.py:C901
service/models.py:F403,F401 service/models.py:F403,F401
service/tasks.py:E712 service/tasks.py:E712,C901

@ -162,6 +162,19 @@ REST_REGISTRATION = {
'VERIFICATION_FROM_EMAIL': 'noreply@spatiality.website', 'VERIFICATION_FROM_EMAIL': 'noreply@spatiality.website',
'USER_LOGIN_FIELDS': ['email'], 'USER_LOGIN_FIELDS': ['email'],
} }
SWAGGER_SETTINGS = {
'DEFAULT_INFO': 'service.urls.info',
'USE_SESSION_AUTH': False,
'SECURITY_DEFINITIONS': {
'basic': {
'type': 'basic',
},
},
'SWAGGER_PATH': 'django_static/swagger/swagger.yaml',
}
SRID = 4326 SRID = 4326
# celery config # celery config

@ -1,11 +1,16 @@
import os import os
import catboost import catboost
import geopandas as gpd
import numpy as np
import pandas as pd import pandas as pd
import psycopg2 import psycopg2
import sqlalchemy import sqlalchemy
from celery import shared_task from celery import shared_task
from django.db.models import F from django.db.models import F
from scipy import interpolate
from scipy.spatial import distance
from shapely import wkb
from sklearn import metrics from sklearn import metrics
from sklearn import model_selection as ms from sklearn import model_selection as ms
from sqlalchemy import text from sqlalchemy import text
@ -15,12 +20,6 @@ from postamates.settings import DB_URL
from service.models import PlacementPoint from service.models import PlacementPoint
# Запустить worker
# celery -A postamates worker -l info
# Запустить scheduler
# celery -A postamates beat -l INFO.
@shared_task() @shared_task()
def raschet(): def raschet():
conn = sqlalchemy.create_engine( conn = sqlalchemy.create_engine(
@ -30,8 +29,16 @@ def raschet():
query = text('select * from service_placementpoint') query = text('select * from service_placementpoint')
connection = conn.connect() connection = conn.connect()
pts = pd.read_sql(query, connection) pts = pd.read_sql(query, connection)
pts.loc[pts.target_dist > 700, 'target_dist'] = 700 pts['geometry'] = pts['geometry'].apply(wkb.loads, hex=True)
pts = pts.sort_values(by='id').reset_index(drop=True) pts = gpd.GeoDataFrame(pts, geometry='geometry', crs='epsg:4326')
pts = pts.to_crs('epsg:32637')
pts = pts.rename(
columns={
'target_cnt_nearby_mean': 'target_dist1',
'target_age_nearby_mean': 'target_dist2',
'yndxfood_cnt_cst': 'target_dist3',
},
)
feats = [ feats = [
'id', 'metro_dist', 'target_dist', 'property_price_bargains', 'property_price_offers', 'id', 'metro_dist', 'target_dist', 'property_price_bargains', 'property_price_offers',
@ -41,13 +48,53 @@ def raschet():
'pharmacy_cnt', 'supermarket_cnt', 'supermarket_premium_cnt', 'clinic_cnt', 'bank_cnt', 'reca_cnt', 'pharmacy_cnt', 'supermarket_cnt', 'supermarket_premium_cnt', 'clinic_cnt', 'bank_cnt', 'reca_cnt',
'lab_cnt', 'culture_cnt', 'attraction_cnt', 'mfc_cnt', 'bc_cnt', 'tc_cnt', 'rival_pvz_cnt', 'lab_cnt', 'culture_cnt', 'attraction_cnt', 'mfc_cnt', 'bc_cnt', 'tc_cnt', 'rival_pvz_cnt',
'rival_post_cnt', 'rival_post_cnt',
'business_activity', 'age_day', 'target_age_nearby_mean', 'target_cnt_ao_mean', 'business_activity', 'age_day', 'target_cnt_ao_mean', 'target_dist1', 'target_dist2', 'target_dist3',
# 'target_cnt_nearby_mean'
] ]
# Записи для обучения # Записи для обучения
pts_trn = pts.loc[pts.sample_trn == True].reset_index(drop=True) pts_trn = pts.loc[pts.sample_trn == True].reset_index(drop=True)
# pts_trn = pts_trn.loc[pts_trn.fact < 450].reset_index(drop=True) pts_trn = gpd.GeoDataFrame(pts_trn, geometry='geometry', crs='epsg:32637')
pts_target = pts_trn[['geometry']]
pts_target['cnt'] = 1
pts_target = gpd.GeoDataFrame(pts_target, geometry='geometry', crs='epsg:32637')
target_feature_coords = []
for i in range(0, len(pts_target)):
target_feature_coords.append((pts_target.geometry.x[i], pts_target.geometry.y[i]))
target_feature_coords = np.array(target_feature_coords)
pts_trn['target_dist'] = pts_trn.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[1])),
axis=1,
)
pts_trn.loc[pts_trn.target_dist > 700, 'target_dist'] = 700
pts_trn['target_dist1'] = pts_trn.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[2])),
axis=1,
)
pts_trn.loc[pts_trn.target_dist1 > 700, 'target_dist1'] = 700
pts_trn['target_dist2'] = pts_trn.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[3])),
axis=1,
)
pts_trn.loc[pts_trn.target_dist2 > 700, 'target_dist2'] = 700
pts_trn['target_dist3'] = pts_trn.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[4])),
axis=1,
)
pts_trn.loc[pts_trn.target_dist3 > 700, 'target_dist3'] = 700
pts_trn['buf'] = pts_trn.buffer(500)
pts_trn = gpd.GeoDataFrame(pts_trn, geometry='buf', crs='epsg:32637')
target_post = gpd.sjoin(pts_trn, pts_target, op='contains').groupby('id', as_index=False).agg({'cnt': 'count'})
target_post = target_post.rename(columns={'cnt': 'target_post_cnt'})
pts_trn = pts_trn.drop(columns=['target_post_cnt'])
pts_trn = pts_trn.join(target_post.set_index('id'), on='id')
pts_trn['target_post_cnt'] = pts_trn['target_post_cnt'] - 1
pts_trn = pts_trn.sort_values(by='id').reset_index(drop=True)
X_trn = pts_trn[feats].drop(columns=['id']) X_trn = pts_trn[feats].drop(columns=['id'])
Y_trn = pts_trn[['fact']] Y_trn = pts_trn[['fact']]
@ -56,9 +103,56 @@ def raschet():
(pts.status == 'Installation') | (pts.status == 'Installation') |
(pts.status == 'Cancelled') | (pts.status == 'Cancelled') |
((pts.status == 'Working') & (pts.sample_trn == False))].reset_index(drop=True) ((pts.status == 'Working') & (pts.sample_trn == False))].reset_index(drop=True)
pts_inf = gpd.GeoDataFrame(pts_inf, geometry='geometry', crs='epsg:32637')
pts_inf['buf'] = pts_inf.buffer(500)
pts_inf = gpd.GeoDataFrame(pts_inf, geometry='buf', crs='epsg:32637')
pts_target = pts.loc[(pts.status == 'Working') |
(pts.status == 'Installation') |
(pts.sample_trn == True)].reset_index(drop=True)
pts_target = pts_target[['geometry']]
pts_target['cnt'] = 1
pts_target = gpd.GeoDataFrame(pts_target, geometry='geometry', crs='epsg:32637')
target_feature_coords = []
for i in range(0, len(pts_target)):
target_feature_coords.append((pts_target.geometry.x[i], pts_target.geometry.y[i]))
target_feature_coords = np.array(target_feature_coords)
pts_inf['target_dist'] = pts_inf.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[0])),
axis=1,
)
pts_inf.loc[pts_inf.target_dist > 700, 'target_dist'] = 700
pts_inf['target_dist1'] = pts_inf.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[1])),
axis=1,
)
pts_inf.loc[pts_inf.target_dist1 > 700, 'target_dist1'] = 700
pts_inf['target_dist2'] = pts_inf.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[2])),
axis=1,
)
pts_inf.loc[pts_inf.target_dist2 > 700, 'target_dist2'] = 700
pts_inf['target_dist3'] = pts_inf.apply(
lambda x: ((sorted(distance.cdist([[x['geometry'].x, x['geometry'].y]], target_feature_coords)[0])[3])),
axis=1,
)
pts_inf.loc[pts_inf.target_dist3 > 700, 'target_dist3'] = 700
pts_inf = pts_inf.sort_values(by='id').reset_index(drop=True)
target_post = gpd.sjoin(pts_inf, pts_target, op='contains').groupby('id', as_index=False).agg({'cnt': 'count'})
target_post = target_post.rename(columns={'cnt': 'target_post_cnt'})
pts_inf = pts_inf.drop(columns=['target_post_cnt'])
pts_inf = pts_inf.join(target_post.set_index('id'), on='id')
pts_inf['age_day_init'] = pts_inf['age_day']
pts_inf['age_day'] = 240 pts_inf['age_day'] = 240
X_inf = pts_inf[feats] X_inf = pts_inf[feats]
seeds = [39, 85, 15, 1, 59]
seeds = [3, 99, 87, 21, 15]
# Обучение, инференс # Обучение, инференс
r2_scores = [] r2_scores = []
@ -79,12 +173,17 @@ def raschet():
current_pred = sum(y_infers) / 5 current_pred = sum(y_infers) / 5
# Обновление полей по результатам работы модели # Обновление полей по результатам работы модели
update_fields = pts_inf[['id', 'delta_current', 'delta_first', 'plan_current', 'plan_first', 'prediction_first']] update_fields = pts_inf[
[
'id', 'age_day_init', 'status', 'fact', 'delta_current', 'delta_first', 'plan_current', 'plan_first',
'prediction_first',
]
]
update_fields = update_fields.join( update_fields = update_fields.join(
pd.concat( pd.concat(
[ [
X_inf[['id']], X_inf[['id']],
pd.DataFrame([{'prediction_current': current_pred}]), pd.DataFrame({'prediction_current': current_pred}),
], ],
axis=1, axis=1,
).set_index('id'), ).set_index('id'),
@ -92,6 +191,30 @@ def raschet():
) )
update_fields['prediction_current'] = update_fields['prediction_current'].astype(int) update_fields['prediction_current'] = update_fields['prediction_current'].astype(int)
days_x = np.array([0, 30, 60, 90, 120, 150, 180, 210, 240, 270])
perc_y = np.array([0, 0.15, 0.20, 0.30, 0.60, 0.70, 0.70, 0.75, 0.75, 0.80])
spl = interpolate.splrep(days_x, perc_y)
update_fields['plan_first'] = update_fields.apply(
lambda x: (x.prediction_first * interpolate.splev(x.age_day_init, spl) if x.status == 'Working' else 0),
axis=1,
)
update_fields['plan_current'] = update_fields.apply(
lambda x: (x.prediction_current * interpolate.splev(x.age_day_init, spl) if x.status == 'Working' else 0),
axis=1,
)
update_fields['delta_first'] = update_fields.apply(
lambda x: ((x.fact - x.plan_first) / x.plan_first * 100 if x.status == 'Working' else 0),
axis=1,
)
update_fields['delta_current'] = update_fields.apply(
lambda x: ((x.fact - x.plan_current) / x.plan_current * 100 if x.status == 'Working' else 0),
axis=1,
)
update_fields_working = update_fields.loc[update_fields.status == 'Working'].reset_index(drop=True)
update_fields_working = update_fields_working.fillna(0)
# Загрузка в базу обновленных значений # Загрузка в базу обновленных значений
conn2 = psycopg2.connect( conn2 = psycopg2.connect(
database=os.getenv('POSTGRES_DB', 'postgres'), user=os.getenv('POSTGRES_USER', 'postgres'), database=os.getenv('POSTGRES_DB', 'postgres'), user=os.getenv('POSTGRES_USER', 'postgres'),
@ -100,17 +223,70 @@ def raschet():
options='-c search_path=public', options='-c search_path=public',
) )
cursor = conn2.cursor() cursor = conn2.cursor()
# prediction_current
update_records1 = [] update_records1 = []
for i in range(0, len(update_fields)): for i in range(0, len(update_fields)):
update_records1.append((int(update_fields.prediction_current[i]), int(update_fields.id[i]))) update_records1.append((int(update_fields.prediction_current[i]), int(update_fields.id[i])))
sql_update_query = """Update service_placementpoint set prediction_current = %s where id = %s""" sql_update_query = """Update service_placementpoint set prediction_current = %s where id = %s"""
try: try:
cursor.executemany(sql_update_query, update_records1) psycopg2.extras.execute_batch(cursor, sql_update_query, update_records1)
conn2.commit()
except Exception:
cursor.execute('ROLLBACK')
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records1)
conn2.commit()
# plan_first
update_records2 = []
for i in range(0, len(update_fields_working)):
update_records2.append((int(update_fields_working.plan_first[i]), int(update_fields_working.id[i])))
sql_update_query = """Update service_placementpoint set plan_first = %s where id = %s"""
try:
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records2)
conn2.commit()
except Exception:
cursor.execute('ROLLBACK')
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records2)
conn2.commit()
# plan_current
update_records3 = []
for i in range(0, len(update_fields_working)):
update_records3.append((int(update_fields_working.plan_current[i]), int(update_fields_working.id[i])))
sql_update_query = """Update service_placementpoint set plan_current = %s where id = %s"""
try:
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records3)
conn2.commit()
except Exception:
cursor.execute('ROLLBACK')
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records3)
conn2.commit()
# delta_first
update_records4 = []
for i in range(0, len(update_fields_working)):
update_records4.append((int(update_fields_working.delta_first[i]), int(update_fields_working.id[i])))
sql_update_query = """Update service_placementpoint set delta_first = %s where id = %s"""
try:
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records4)
conn2.commit()
except Exception:
cursor.execute('ROLLBACK')
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records4)
conn2.commit()
# delta_current
update_records5 = []
for i in range(0, len(update_fields_working)):
update_records5.append((int(update_fields_working.delta_current[i]), int(update_fields_working.id[i])))
sql_update_query = """Update service_placementpoint set delta_current = %s where id = %s"""
try:
psycopg2.extras.execute_batch(cursor, sql_update_query, update_records5)
conn2.commit() conn2.commit()
except Exception: except Exception:
cursor.execute('ROLLBACK') cursor.execute('ROLLBACK')
cursor.executemany(sql_update_query, update_records1) psycopg2.extras.execute_batch(cursor, sql_update_query, update_records5)
conn2.commit() conn2.commit()

@ -12,16 +12,16 @@ from service import views
router = routers.DefaultRouter() router = routers.DefaultRouter()
router.register('', views.PlacementPointViewSet) router.register('', views.PlacementPointViewSet)
info = openapi.Info(
title='Snippets API',
default_version='v1',
description='Test description',
terms_of_service='https://www.google.com/policies/terms/',
contact=openapi.Contact(email='contact@snippets.local'),
license=openapi.License(name='BSD License'),
)
schema_view = get_schema_view( schema_view = get_schema_view(
openapi.Info( info,
title='Snippets API',
default_version='v1',
description='Test description',
terms_of_service='https://www.google.com/policies/terms/',
contact=openapi.Contact(email='contact@snippets.local'),
license=openapi.License(name='BSD License'),
),
url='https://postamates.spatiality.website/', url='https://postamates.spatiality.website/',
public=True, public=True,
permission_classes=[permissions.AllowAny], permission_classes=[permissions.AllowAny],

@ -52,6 +52,7 @@ class PlacementPointViewSet(ReadOnlyModelViewSet):
excluded = self.request.GET.get('excluded[]') excluded = self.request.GET.get('excluded[]')
plan_first = self.request.GET.get('plan_first[]') plan_first = self.request.GET.get('plan_first[]')
plan_current = self.request.GET.get('plan_current[]') plan_current = self.request.GET.get('plan_current[]')
delta_first = self.request.GET.get('delta_first[]')
delta_current = self.request.GET.get('delta_current[]') delta_current = self.request.GET.get('delta_current[]')
rayons = self.request.GET.get('area[]') rayons = self.request.GET.get('area[]')
aos = self.request.GET.get('district[]') aos = self.request.GET.get('district[]')
@ -88,6 +89,9 @@ class PlacementPointViewSet(ReadOnlyModelViewSet):
if delta_current: if delta_current:
delta_current = list(delta_current.split(',')) delta_current = list(delta_current.split(','))
qs = qs.filter(delta_current__range=delta_current) qs = qs.filter(delta_current__range=delta_current)
if delta_first:
delta_first = list(delta_first.split(','))
qs = qs.filter(delta_first__range=delta_first)
if rayons: if rayons:
rayons = list(rayons.split(',')) rayons = list(rayons.split(','))
qs = qs.filter(area_id__in=rayons) qs = qs.filter(area_id__in=rayons)

Loading…
Cancel
Save