From 03d631b9e76e07e6f10e0b490e9c3739f91a795b Mon Sep 17 00:00:00 2001
From: ext-neveu_n <nneveu@hawk.iit.edu>
Date: Wed, 23 May 2018 15:23:36 -0500
Subject: [PATCH] adding function to read mldb databases

---
 opal/analysis/pareto_fronts.py | 138 +++++++++++++++++++++++----------
 1 file changed, 99 insertions(+), 39 deletions(-)

diff --git a/opal/analysis/pareto_fronts.py b/opal/analysis/pareto_fronts.py
index b76e376..568a9cf 100644
--- a/opal/analysis/pareto_fronts.py
+++ b/opal/analysis/pareto_fronts.py
@@ -1,44 +1,15 @@
 # Author:   Nicole Neveu 
 # Date:     May 2018
 
-from opal.datasets.filetype import FileType
-from opal.statistics import statistics as stat
-from opal.datasets.DatasetBase import DatasetBase
-from opal.analysis import impl_beam
-import numpy as np
 
-import json
-import pylab as pl
+import numpy as np
 import pandas as pd
-import matplotlib.pyplot as plt
-from matplotlib.widgets import Slider, Button, RadioButtons
-
-from collections import OrderedDict
-from optPilot.Annotate import AnnoteFinder
-import pyOPALTools.optPilot.OptPilotJsonReader as jsonreader
-
-
-def scaleData(vals):
-    """
-    Scale 1D data array from 0 to 1.
-    Used to compare objectives with different units.
-
-    Parameters
-    ----------
-    vals    (numpy array)   1D array that holds any opal data
-
-    Returns
-    -------
-    sacaled_vals    (numpy array)   1D array scaled from 0 to 1
-    """
-    smax = np.max(vals)
-    smin = np.min(vals)
-    scaled_vals = (vals - smin)/smax
-    return (scaled_vals)
+from opal.datasets.filetype import FileType
+from db import mldb
 
 def pareto(x, y, dvars=0):
     """
-    Find Pareto points for 2 objectives given
+    Find Pareto points for 2 objectives, given
     all data recorded by optimization run. 
     These points are calculated independent
     of generation. i.e. best points from all 
@@ -46,13 +17,28 @@ def pareto(x, y, dvars=0):
 
     Parameters 
     ----------
-    x   (numpy array)   array of first objective values
-    y   (numpy array)   array of second objective values
+    x   (numpy array)   1D array of first objective values
+    y   (numpy array)   1D array of second objective values
     
     Optionals
     ---------
-    dvars
+    dvars   (numpy array)   ND array of design variables 
+
+    Returns
+    -------
+    pfdict (dictionary) Dictionary that holds pareto front
+                        values and corresponding design values
     """
+    #Check data is correct length
+    lx = len(x)
+    ly = len(y)
+    ld = len(dvars[:,0])
+    if lx==ly==ld:
+        pass
+    else: 
+        print('Input data sizes do not match\n')
+        print('Please check input arrays')
+    
     #Making holders for my pareto fronts            
     pareto_y = []
     pareto_x = []
@@ -60,6 +46,7 @@ def pareto(x, y, dvars=0):
     w  = np.arange(0,1.001, 0.001)
     sx = scaleData(x)
     sy = scaleData(y)
+    
     #Finding best point with respect to all weights (w)
     for i in range(0, len(w)):
         fobj     = sy * w[i] + sx *(1-w[i])
@@ -69,11 +56,84 @@ def pareto(x, y, dvars=0):
 
     pareto_pts = delete_repeats(pareto_x, pareto_y)
     ind        = np.array(pareto_pts.index.tolist())
-    pdvar      = dvars[ind, :]
+
+    #Check dvars is correct length
+    if dvars!=0:
+        pdvar      = dvars[ind, :]
 
     return(pareto_pts.ix[:,0], pareto_pts.ix[:,1], pdvar) #pareto_x, pareto_y, pdvar)
 
-def delete_repeats(x, y): #, z):
-    df = pd.DataFrame({'x':x, 'y':y}) #, 'z':z})
+
+def get_all_data_db(dbpath):
+    """
+    Get all objectives and design variables
+    from every generation in an optimzation
+    database. Databases are made using OPAL 
+    output from json files or stat files. 
+    Functions to make databases can be found
+    in mldb.py. 
+    
+    Parameters 
+    ----------
+    db  (str)   path to pickle file containing 
+                database made with mldb.py
+
+    Returns
+    -------
+    data    (dict)  Dictonary containing all 
+                    objectives and design values
+                    in optimization database.
+    """
+    data = {}
+    dbr = mldb.mldb()
+    dbr.load(dbpath)
+    #dvars  = dbr.getXNames()
+    #obj    = dbr.getYNames()
+    gens   = dbr.getNumberOfSamples()
+
+    
+    return(data)
+
+
+
+
+def scaleData(vals):
+    """
+    Scale 1D data array from 0 to 1.
+    Used to compare objectives with different units.
+
+    Parameters
+    ----------
+    vals    (numpy array)   1D array that holds any opal data
+
+    Returns
+    -------
+    sacaled_vals    (numpy array)   1D array scaled from 0 to 1
+    """
+    smax = np.max(vals)
+    smin = np.min(vals)
+    scaled_vals = (vals - smin)/smax
+    return (scaled_vals)
+
+
+def delete_repeats(x, y, z=0):
+    """
+    Delete repeated pareto front values, if any.
+    
+    Parameters
+    ----------
+    x   (numpy array)   1D array of first objective values
+    y   (numpy array)   1D array of second objective values
+   
+    Optionals
+    ---------
+    z   (numpy array)   ND array of second design variables
+
+    """
+    if z==0:
+        df = pd.DataFrame({'x':x, 'y':y}) #, 'z':z})
+    else:
+        df = pd.DataFrame({'x':x, 'y':y, 'z':z})
+    
     return df.drop_duplicates(subset=['x', 'y'], keep='first')
 
-- 
GitLab