33import os
44import sys
55import time
6- import numpy as np
6+ import pandas as pd
77
88import peewee as pw
99from playhouse .postgres_ext import ArrayField , BinaryJSONField
@@ -73,6 +73,7 @@ class File(BaseModel):
7373 name = pw .CharField (null = True )
7474 created = pw .DateTimeField (default = datetime .datetime .now )
7575
76+
7677@signals .post_delete (sender = File )
7778def remove_file_after_delete (sender , instance ):
7879 try :
@@ -135,6 +136,7 @@ class Meta:
135136 (('dataset' , 'file' ), True ),
136137 )
137138
139+
138140@signals .pre_delete (sender = Dataset )
139141def remove_related_files (sender , instance ):
140142 for f in instance .files :
@@ -148,7 +150,7 @@ class Featureset(BaseModel):
148150 name = pw .CharField ()
149151 created = pw .DateTimeField (default = datetime .datetime .now )
150152 features_list = ArrayField (pw .CharField )
151- custom_features_script = pw .CharField (null = True ) # move to fset file?
153+ custom_features_script = pw .CharField (null = True ) # move to fset file?
152154 file = pw .ForeignKeyField (File , on_delete = 'CASCADE' )
153155 task_id = pw .CharField (null = True )
154156 finished = pw .DateTimeField (null = True )
@@ -194,16 +196,15 @@ def is_owned_by(self, username):
194196 def format_pred_data (fset , data ):
195197 fset .columns = fset .columns .droplevel ('channel' )
196198 fset .index = fset .index .astype (str ) # can't use ints as JSON keys
197- result = {}
198- for i , name in enumerate (fset .index ):
199- result [name ] = {'features' : fset .loc [name ].to_dict ()}
200- if 'labels' in data :
201- result [name ]['label' ] = data ['labels' ][i ]
202- if len (data ['pred_probs' ]) > 0 :
203- result [name ]['prediction' ] = dict (zip (data ['all_classes' ],
204- data ['pred_probs' ][i ]))
205- else :
206- result [name ]['prediction' ] = data ['preds' ][i ]
199+ labels = pd .Series (data .get ('labels' ), index = fset .index )
200+ if len (data .get ('pred_probs' , [])) > 0 :
201+ preds = pd .DataFrame (data .get ('pred_probs' , []),
202+ index = fset .index ).to_dict (orient = 'index' )
203+ else :
204+ preds = pd .Series (data ['preds' ], index = fset .index ).to_dict ()
205+ result = {name : {'features' : feats , 'label' : labels .loc [name ],
206+ 'prediction' : preds [name ]}
207+ for name , feats in fset .to_dict (orient = 'index' ).items ()}
207208 return result
208209
209210 def display_info (self ):
@@ -238,6 +239,7 @@ def create_tables(retry=5):
238239 print ('Could not connect to database...sleeping 5' )
239240 time .sleep (5 )
240241
242+
241243def drop_tables ():
242244 db .drop_tables (models , safe = True , cascade = True )
243245
0 commit comments