UDFBase.h

Go to the documentation of this file.
00001 //# UDFBase.h: Abstract base class for a user-defined TaQL function
00002 //# Copyright (C) 2010
00003 //# Associated Universities, Inc. Washington DC, USA.
00004 //#
00005 //# This library is free software; you can redistribute it and/or modify it
00006 //# under the terms of the GNU Library General Public License as published by
00007 //# the Free Software Foundation; either version 2 of the License, or (at your
00008 //# option) any later version.
00009 //#
00010 //# This library is distributed in the hope that it will be useful, but WITHOUT
00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00012 //# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
00013 //# License for more details.
00014 //#
00015 //# You should have received a copy of the GNU Library General Public License
00016 //# along with this library; if not, write to the Free Software Foundation,
00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
00018 //#
00019 //# Correspondence concerning AIPS++ should be addressed as follows:
00020 //#        Internet email: aips2-request@nrao.edu.
00021 //#        Postal address: AIPS++ Project Office
00022 //#                        National Radio Astronomy Observatory
00023 //#                        520 Edgemont Road
00024 //#                        Charlottesville, VA 22903-2475 USA
00025 //#
00026 //# $Id: UDFBase.h 21262 2012-09-07 12:38:36Z gervandiepen $
00027 
00028 #ifndef TABLES_UDFBASE_H
00029 #define TABLES_UDFBASE_H
00030 
00031 //# Includes
00032 #include <casacore/casa/aips.h>
00033 #include <casacore/tables/TaQL/ExprNodeRep.h>
00034 #include <casacore/tables/Tables/Table.h>
00035 #include <casacore/tables/TaQL/TaQLStyle.h>
00036 #include <casacore/casa/Containers/Block.h>
00037 #include <casacore/casa/OS/Mutex.h>
00038 #include <casacore/casa/stdmap.h>
00039 
00040 
00041 namespace casacore {
00042 
00043   // <summary>
00044   // Abstract base class for a user-defined TaQL function
00045   // </summary>
00046   //
00047   // <synopsis>
00048   // This class makes it possible to add user-defined functions (UDF) to TaQL.
00049   // A UDF has to be implemented in a class derived from this class and can
00050   // contain one or more user-defined functions.
00051   // <br>A few functions have to be implemented in the class as described below.
00052   // In this way TaQL can be extended with arbitrary functions, which can be
00053   // normal functions as well as aggregate functions (often used with GROUPBY).
00054   //
00055   // A UDF is a class derived from this base class. It must contain the
00056   // following member functions. See also the example below.
00057   // <table border=0>
00058   // <tr>
00059   //  <td><src>makeObject</src></td>
00060   //  <td>a static function to create an object of the UDF class. This function
00061   //    needs to be registered.
00062   //  </td>
00063   // </tr>
00064   // <tr>
00065   //  <td><src>setup</src></td>
00066   //  <td>this virtual function is called after the object has been created.
00067   //   It should initialize the object using the function arguments that
00068   //   can be obtained using the function <src>operands()</src>. The setup
00069   //   function should perform the following:
00070   //   <ul>
00071   //    <li>Define the data type of the result using <src>setDataType<src>.
00072   //        The data type should be derived from the data types of the function
00073   //        arguments. The possible data types are defined in class
00074   //        TableExprNodeRep.
00075   //        Note that a UDF can support multiple data types. For example, a
00076   //        function like <src>min</src> can be used for Int, Double, or a mix.
00077   //        Function 'checkDT' in class TableExprNodeMulti can be used to
00078   //        check the data types of the operands and determine the result
00079   //        data type.
00080   //    <li>Define if the function is an aggregate function calculating
00081   //        an aggregated value in a group (e.g., minimum or mean).
00082   //        <src>setAggregate</src> can be used to tell so.
00083   //    <li>Define the dimensionality of the result using <src>setNDim</src>.
00084   //        A value of 0 means a scalar. A value of -1 means an array with
00085   //        a dimensionality that can vary from row to row.
00086   //    <li>Optionally use <src>setShape</src> to define the shape if the
00087   //        results are arrays with a shape that is the same for all rows.
00088   //        It will also set ndim if setNDim was not used yet, otherwise
00089   //        it checks if it ndim matches.
00090   //    <li>Optionally set the unit of the result using <src>setUnit</src>.
00091   //        TaQL has full support of units, so UDFs should behave the same.
00092   //        It is possible to change the unit of the function arguments.
00093   //        For example:
00094   //        <ul>
00095   //         <li>a function like 'sin' can force its argument to be
00096   //          in radians; TaQL will scale the argument as needed. This can be
00097   //          done like
00098   //          <src>TableExprNodeUnit::adaptUnit (operands()[i], "rad");</src>
00099   //         <li>A function like 'asin' will have a result in radians.
00100   //          Such a UDF should set its result unit to rad.
00101   //         <li>A function like 'min' wants its arguments to have the same
00102   //          unit and will set its result unit to it. It can be done like:
00103   //          <src>setUnit (TableExprFuncNode::makeEqualUnits
00104   //                        (operands(), 0, operands().size()));</src>
00105   //        </ul>
00106   //        See class TableExprFuncNode for more info about these functions.
00107   //    <li>Optionally define if the result is a constant value using
00108   //        <src>setConstant</src>. It means that the function is not
00109   //        dependent on the row number in the table being queried.
00110   //        This is usually the case if all UDF arguments are constant.
00111   //   </ul>
00112   //  </td>
00113   // </tr>
00114   // <tr>
00115   //  <td><src>getXXX</src></td>
00116   //  <td>these are virtual get functions for each possible data type. The
00117   //      get functions matching the data types set by the setup
00118   //      function need to be implemented.
00119   //      The <src>get</src> functions have an argument TableExprId
00120   //      defining the table row (or record) for which the function has
00121   //      to be evaluated. 
00122   //      If the UDF is an aggregate functions the TableExprId has to be
00123   //      upcasted to an TableExprIdAggr object from which all TableExprId
00124   //      objects in an aggregation group can be retrieved.
00125   //      <srcblock>
00126   //        const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
00127   //        const vector<TableExprId>& ids = aid.result().ids(id.rownr());
00128   //      </srcblock>
00129   //  </td>
00130   // </tr>
00131   // </table>
00132   //
00133   // A UDF has to be made known to TaQL by adding it to the UDF registry with
00134   // its name and 'makeObject' function.
00135   // UDFs will usually reside in a shared library that is loaded dynamically.
00136   // TaQL will load a UDF in the following way:
00137   // <ul>
00138   //  <li> The UDF name used in TaQL consists of two parts: a library name
00139   //       and a function name separated by a dot. Both parts need to be given.
00140   //       Note that the library name can also be seen as a UDF scope, so
00141   //       different UDFs with equal names can be used from different libraries.
00142   //       A UDF should be registered with this full name.
00143   //       <br>The "USING STYLE" clause can be used to define a synonym for
00144   //       a (long) library name in the TaQLStyle object. The library part
00145   //       of the UDF will always be looked up in this synonym map.
00146   //  <li> If a UDF is not found in the registry, it will be tried to load
00147   //       a shared library using the library name part. The libraries tried
00148   //       to be loaded are lib<library>.so and libcasa_<library>.so.
00149   //       On Mac .dylib will be tried. If loaded successfully, a special
00150   //       function 'register_libname' will be called first. It should
00151   //       register each UDF in the shared library using UDFBase::register.
00152   // </ul>
00153   // </synopsis>
00154   //
00155   // <example>
00156   // The following examples show a normal UDF function.
00157   // <br>It returns True if the function argument matches 1.
00158   // It can be seen that it checks if the argument is an integer scalar.
00159   // <srcblock>
00160   // class TestUDF: public UDFBase
00161   // {
00162   // public:
00163   //   TestUDF() {}
00164   //   // Registered function to create the UDF object.
00165   //   // The name of the function is not important here.
00166   //   static UDFBase* makeObject (const String&)
00167   //     { return new TestUDF(); }
00168   //   // Setup and check the details; result is a bool scalar value.
00169   //   virtual void setup (const Table&, const TaQLStyle&)
00170   //   {
00171   //     AlwaysAssert (operands().size() == 1, AipsError);
00172   //     AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt,
00173   //                   AipsError);
00174   //     AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar,
00175   //                   AipsError);
00176   //     setDataType (TableExprNodeRep::NTBool);
00177   //     setNDim (0);                                 // scalar result
00178   //     setConstant (operands()[0].isConstant());    // constant result?
00179   //   }
00180   //   // Get the value for the given id.
00181   //   // It gets the value of the operand and checks if it is 1.
00182   //   Bool getBool (const TableExprId& id)
00183   //     { return operands()[0]->getInt(id) == 1; }
00184   // };
00185   // </srcblock>
00186   // </example>
00187 
00188   // <example>
00189   // The following example shows an aggregate UDF function.
00190   // It calculates the sum of the cubes of the values in a group.
00191   // <srcblock>
00192   // class TestUDFAggr: public UDFBase
00193   // {
00194   // public:
00195   //   TestUDFAggr() {}
00196   //   // Registered function to create the UDF object.
00197   //   // The name of the function is not important here.
00198   //   static UDFBase* makeObject (const String&) { return new TestUDFAggr(); }
00199   //   // Setup and check the details; result is an integer scalar value.
00200   //   // It aggregates the values of multiple rows.
00201   //   virtual void setup (const Table&, const TaQLStyle&)
00202   //   {
00203   //     AlwaysAssert (operands().size() == 1, AipsError);
00204   //     AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt, AipsError);
00205   //     AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar, AipsError);
00206   //     setDataType (TableExprNodeRep::NTInt);
00207   //     setNDim (0);           // scalar
00208   //     setAggregate (True);   // aggregate function
00209   //   }
00210   //   // Get the value of a group.
00211   //   // It aggregates the values of multiple rows.
00212   //   Int64 getInt (const TableExprId& id)
00213   //   {
00214   //     // Cast the id to a TableExprIdAggr object.
00215   //     const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
00216   //     // Get the vector of ids for this group.
00217   //     const vector<TableExprId>& ids = aid.result().ids(id.rownr());
00218   //     // Get the values for all ids and accumulate them.
00219   //     Int64 sum3 = 0;
00220   //     for (vector<TableExprId>::const_iterator it=ids.begin();
00221   //          it!=ids.end(); ++it){
00222   //       Int64 v = operands()[0]->getInt(*it);
00223   //         sum3 += v*v*v;
00224   //     }
00225   //     return sum3;
00226   //   }
00227   // };
00228   // </srcblock>
00229   // </example>
00230   // More examples of UDF functions can be found in classes UDFMSCal
00231   // and DirectionUDF.
00232 
00233   class UDFBase
00234   {
00235   public:
00236     // The signature of a global or static member function creating an object
00237     // of the UDF.
00238     typedef UDFBase* MakeUDFObject (const String& functionName);
00239 
00240     // Only default constructor is needed.
00241     UDFBase();
00242 
00243     // Destructor.
00244     virtual ~UDFBase();
00245 
00246     // Evaluate the function and return the result.
00247     // Their default implementations throw a "not implemented" exception.
00248     // <group>
00249     virtual Bool      getBool     (const TableExprId& id);
00250     virtual Int64     getInt      (const TableExprId& id);
00251     virtual Double    getDouble   (const TableExprId& id);
00252     virtual DComplex  getDComplex (const TableExprId& id);
00253     virtual String    getString   (const TableExprId& id);
00254     virtual TaqlRegex getRegex    (const TableExprId& id);
00255     virtual MVTime    getDate     (const TableExprId& id);
00256     virtual MArray<Bool>     getArrayBool     (const TableExprId& id);
00257     virtual MArray<Int64>    getArrayInt      (const TableExprId& id);
00258     virtual MArray<Double>   getArrayDouble   (const TableExprId& id);
00259     virtual MArray<DComplex> getArrayDComplex (const TableExprId& id);
00260     virtual MArray<String>   getArrayString   (const TableExprId& id);
00261     virtual MArray<MVTime>   getArrayDate     (const TableExprId& id);
00262     // </group>
00263 
00264     // Get the unit.
00265     const String& getUnit() const
00266       { return itsUnit; }
00267 
00268     // Get the nodes in the function operands representing an aggregate function.
00269     void getAggrNodes (vector<TableExprNodeRep*>& aggr);
00270 
00271     // Get the nodes in the function operands representing a table column.
00272     void getColumnNodes (vector<TableExprNodeRep*>& cols);
00273   
00274   private:
00275     // Set up the function object.
00276     virtual void setup (const Table& table,
00277                         const TaQLStyle&) = 0;
00278 
00279   protected:
00280     // Get the operands.
00281     PtrBlock<TableExprNodeRep*>& operands()
00282       { return itsOperands; }
00283 
00284     // Set the data type.
00285     // This function must be called by the setup function of the derived class.
00286     void setDataType (TableExprNodeRep::NodeDataType);
00287 
00288     // Set the dimensionality of the results.
00289     // <br> 0 means that the results are scalars.
00290     // <br> -1 means that the results are arrays with unknown dimensionality.
00291     // <br> >0 means that the results are arrays with that dimensionality.
00292     // This function must be called by the setup function of the derived class.
00293     void setNDim (Int ndim);
00294 
00295     // Set the shape of the results if it is fixed and known.
00296     void setShape (const IPosition& shape);
00297 
00298     // Set the unit of the result.
00299     // If this function is not called by the setup function of the derived
00300     // class, the result has no unit.
00301     void setUnit (const String& unit);
00302 
00303     // Define if the result is constant (e.g. if all arguments are constant).
00304     // If this function is not called by the setup function of the derived
00305     // class, the result is not constant.
00306     void setConstant (Bool isConstant);
00307 
00308     // Define if the UDF is an aggregate function (usually used in GROUPBY).
00309     void setAggregate (Bool isAggregate);
00310 
00311     // Let a derived class recreate its column objects in case a selection
00312     // has to be applied.
00313     // The default implementation does nothing.
00314     virtual void recreateColumnObjects (const Vector<uInt>& rownrs);
00315 
00316   public:
00317     // Register the name and construction function of a UDF (thread-safe).
00318     // An exception is thrown if this name already exists with a different
00319     // construction function.
00320     static void registerUDF (const String& name, MakeUDFObject* func);
00321 
00322     // Initialize the function object.
00323     void init (const PtrBlock<TableExprNodeRep*>& arg,
00324                const Table& table, const TaQLStyle&);
00325 
00326     // Get the data type.
00327     TableExprNodeRep::NodeDataType dataType() const
00328       { return itsDataType; }
00329 
00330     // Get the dimensionality of the results.
00331     // (0=scalar, -1=array with variable ndim, >0=array with fixed ndim
00332     Int ndim() const
00333       { return itsNDim; }
00334 
00335     // Get the result shape if the same for all results.
00336     const IPosition& shape() const
00337       { return itsShape; }
00338 
00339     // Tell if the UDF gives a constant result.
00340     Bool isConstant() const
00341       { return itsIsConstant; }
00342 
00343     // Tell if the UDF is an aggregate function.
00344     Bool isAggregate() const
00345       { return itsIsAggregate; }
00346 
00347     // Do not apply the selection.
00348     void disableApplySelection()
00349       { itsApplySelection = False; }
00350 
00351     // If needed, let the UDF re-create column objects for a selection of rows.
00352     // It calls the function recreateColumnObjects.
00353     void applySelection (const Vector<uInt>& rownrs);
00354 
00355     // Create a UDF object (thread-safe).
00356     // It looks in the map with fixed function names. If unknown,
00357     // it looks if a wildcarded function name is supported (for PyTaQL).
00358     static UDFBase* createUDF (const String& name, const TaQLStyle& style);
00359 
00360   private:
00361     //# Data members.
00362     PtrBlock<TableExprNodeRep*>    itsOperands;
00363     TableExprNodeRep::NodeDataType itsDataType;
00364     Int                            itsNDim;
00365     IPosition                      itsShape;
00366     String                         itsUnit;
00367     Bool                           itsIsConstant;
00368     Bool                           itsIsAggregate;
00369     Bool                           itsApplySelection;
00370     //# The registry is used for two purposes:
00371     //# 1. It is a map of known function names (lib.func) to funcptr.
00372     //#    Function name * means that the library can contain any function,
00373     //#    which is intended for python functions (through PyTaQL).
00374     //# 2. The loaded libraries are kept in the map (with 0 funcptr).
00375     static map<String, MakeUDFObject*> theirRegistry;
00376     static Mutex                       theirMutex;
00377   };
00378 
00379 } // end namespace
00380 
00381 #endif
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

Generated on 31 Aug 2016 for casa by  doxygen 1.6.1