00001 //# UDFBase.h: Abstract base class for a user-defined TaQL function 00002 //# Copyright (C) 2010 00003 //# Associated Universities, Inc. Washington DC, USA. 00004 //# 00005 //# This library is free software; you can redistribute it and/or modify it 00006 //# under the terms of the GNU Library General Public License as published by 00007 //# the Free Software Foundation; either version 2 of the License, or (at your 00008 //# option) any later version. 00009 //# 00010 //# This library is distributed in the hope that it will be useful, but WITHOUT 00011 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 00012 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public 00013 //# License for more details. 00014 //# 00015 //# You should have received a copy of the GNU Library General Public License 00016 //# along with this library; if not, write to the Free Software Foundation, 00017 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA. 00018 //# 00019 //# Correspondence concerning AIPS++ should be addressed as follows: 00020 //# Internet email: aips2-request@nrao.edu. 00021 //# Postal address: AIPS++ Project Office 00022 //# National Radio Astronomy Observatory 00023 //# 520 Edgemont Road 00024 //# Charlottesville, VA 22903-2475 USA 00025 //# 00026 //# $Id: UDFBase.h 21262 2012-09-07 12:38:36Z gervandiepen $ 00027 00028 #ifndef TABLES_UDFBASE_H 00029 #define TABLES_UDFBASE_H 00030 00031 //# Includes 00032 #include <casacore/casa/aips.h> 00033 #include <casacore/tables/TaQL/ExprNodeRep.h> 00034 #include <casacore/tables/Tables/Table.h> 00035 #include <casacore/tables/TaQL/TaQLStyle.h> 00036 #include <casacore/casa/Containers/Block.h> 00037 #include <casacore/casa/OS/Mutex.h> 00038 #include <casacore/casa/stdmap.h> 00039 00040 00041 namespace casacore { 00042 00043 // <summary> 00044 // Abstract base class for a user-defined TaQL function 00045 // </summary> 00046 // 00047 // <synopsis> 00048 // This class makes it possible to add user-defined functions (UDF) to TaQL. 00049 // A UDF has to be implemented in a class derived from this class and can 00050 // contain one or more user-defined functions. 00051 // <br>A few functions have to be implemented in the class as described below. 00052 // In this way TaQL can be extended with arbitrary functions, which can be 00053 // normal functions as well as aggregate functions (often used with GROUPBY). 00054 // 00055 // A UDF is a class derived from this base class. It must contain the 00056 // following member functions. See also the example below. 00057 // <table border=0> 00058 // <tr> 00059 // <td><src>makeObject</src></td> 00060 // <td>a static function to create an object of the UDF class. This function 00061 // needs to be registered. 00062 // </td> 00063 // </tr> 00064 // <tr> 00065 // <td><src>setup</src></td> 00066 // <td>this virtual function is called after the object has been created. 00067 // It should initialize the object using the function arguments that 00068 // can be obtained using the function <src>operands()</src>. The setup 00069 // function should perform the following: 00070 // <ul> 00071 // <li>Define the data type of the result using <src>setDataType<src>. 00072 // The data type should be derived from the data types of the function 00073 // arguments. The possible data types are defined in class 00074 // TableExprNodeRep. 00075 // Note that a UDF can support multiple data types. For example, a 00076 // function like <src>min</src> can be used for Int, Double, or a mix. 00077 // Function 'checkDT' in class TableExprNodeMulti can be used to 00078 // check the data types of the operands and determine the result 00079 // data type. 00080 // <li>Define if the function is an aggregate function calculating 00081 // an aggregated value in a group (e.g., minimum or mean). 00082 // <src>setAggregate</src> can be used to tell so. 00083 // <li>Define the dimensionality of the result using <src>setNDim</src>. 00084 // A value of 0 means a scalar. A value of -1 means an array with 00085 // a dimensionality that can vary from row to row. 00086 // <li>Optionally use <src>setShape</src> to define the shape if the 00087 // results are arrays with a shape that is the same for all rows. 00088 // It will also set ndim if setNDim was not used yet, otherwise 00089 // it checks if it ndim matches. 00090 // <li>Optionally set the unit of the result using <src>setUnit</src>. 00091 // TaQL has full support of units, so UDFs should behave the same. 00092 // It is possible to change the unit of the function arguments. 00093 // For example: 00094 // <ul> 00095 // <li>a function like 'sin' can force its argument to be 00096 // in radians; TaQL will scale the argument as needed. This can be 00097 // done like 00098 // <src>TableExprNodeUnit::adaptUnit (operands()[i], "rad");</src> 00099 // <li>A function like 'asin' will have a result in radians. 00100 // Such a UDF should set its result unit to rad. 00101 // <li>A function like 'min' wants its arguments to have the same 00102 // unit and will set its result unit to it. It can be done like: 00103 // <src>setUnit (TableExprFuncNode::makeEqualUnits 00104 // (operands(), 0, operands().size()));</src> 00105 // </ul> 00106 // See class TableExprFuncNode for more info about these functions. 00107 // <li>Optionally define if the result is a constant value using 00108 // <src>setConstant</src>. It means that the function is not 00109 // dependent on the row number in the table being queried. 00110 // This is usually the case if all UDF arguments are constant. 00111 // </ul> 00112 // </td> 00113 // </tr> 00114 // <tr> 00115 // <td><src>getXXX</src></td> 00116 // <td>these are virtual get functions for each possible data type. The 00117 // get functions matching the data types set by the setup 00118 // function need to be implemented. 00119 // The <src>get</src> functions have an argument TableExprId 00120 // defining the table row (or record) for which the function has 00121 // to be evaluated. 00122 // If the UDF is an aggregate functions the TableExprId has to be 00123 // upcasted to an TableExprIdAggr object from which all TableExprId 00124 // objects in an aggregation group can be retrieved. 00125 // <srcblock> 00126 // const TableExprIdAggr& aid = TableExprIdAggr::cast (id); 00127 // const vector<TableExprId>& ids = aid.result().ids(id.rownr()); 00128 // </srcblock> 00129 // </td> 00130 // </tr> 00131 // </table> 00132 // 00133 // A UDF has to be made known to TaQL by adding it to the UDF registry with 00134 // its name and 'makeObject' function. 00135 // UDFs will usually reside in a shared library that is loaded dynamically. 00136 // TaQL will load a UDF in the following way: 00137 // <ul> 00138 // <li> The UDF name used in TaQL consists of two parts: a library name 00139 // and a function name separated by a dot. Both parts need to be given. 00140 // Note that the library name can also be seen as a UDF scope, so 00141 // different UDFs with equal names can be used from different libraries. 00142 // A UDF should be registered with this full name. 00143 // <br>The "USING STYLE" clause can be used to define a synonym for 00144 // a (long) library name in the TaQLStyle object. The library part 00145 // of the UDF will always be looked up in this synonym map. 00146 // <li> If a UDF is not found in the registry, it will be tried to load 00147 // a shared library using the library name part. The libraries tried 00148 // to be loaded are lib<library>.so and libcasa_<library>.so. 00149 // On Mac .dylib will be tried. If loaded successfully, a special 00150 // function 'register_libname' will be called first. It should 00151 // register each UDF in the shared library using UDFBase::register. 00152 // </ul> 00153 // </synopsis> 00154 // 00155 // <example> 00156 // The following examples show a normal UDF function. 00157 // <br>It returns True if the function argument matches 1. 00158 // It can be seen that it checks if the argument is an integer scalar. 00159 // <srcblock> 00160 // class TestUDF: public UDFBase 00161 // { 00162 // public: 00163 // TestUDF() {} 00164 // // Registered function to create the UDF object. 00165 // // The name of the function is not important here. 00166 // static UDFBase* makeObject (const String&) 00167 // { return new TestUDF(); } 00168 // // Setup and check the details; result is a bool scalar value. 00169 // virtual void setup (const Table&, const TaQLStyle&) 00170 // { 00171 // AlwaysAssert (operands().size() == 1, AipsError); 00172 // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt, 00173 // AipsError); 00174 // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar, 00175 // AipsError); 00176 // setDataType (TableExprNodeRep::NTBool); 00177 // setNDim (0); // scalar result 00178 // setConstant (operands()[0].isConstant()); // constant result? 00179 // } 00180 // // Get the value for the given id. 00181 // // It gets the value of the operand and checks if it is 1. 00182 // Bool getBool (const TableExprId& id) 00183 // { return operands()[0]->getInt(id) == 1; } 00184 // }; 00185 // </srcblock> 00186 // </example> 00187 00188 // <example> 00189 // The following example shows an aggregate UDF function. 00190 // It calculates the sum of the cubes of the values in a group. 00191 // <srcblock> 00192 // class TestUDFAggr: public UDFBase 00193 // { 00194 // public: 00195 // TestUDFAggr() {} 00196 // // Registered function to create the UDF object. 00197 // // The name of the function is not important here. 00198 // static UDFBase* makeObject (const String&) { return new TestUDFAggr(); } 00199 // // Setup and check the details; result is an integer scalar value. 00200 // // It aggregates the values of multiple rows. 00201 // virtual void setup (const Table&, const TaQLStyle&) 00202 // { 00203 // AlwaysAssert (operands().size() == 1, AipsError); 00204 // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt, AipsError); 00205 // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar, AipsError); 00206 // setDataType (TableExprNodeRep::NTInt); 00207 // setNDim (0); // scalar 00208 // setAggregate (True); // aggregate function 00209 // } 00210 // // Get the value of a group. 00211 // // It aggregates the values of multiple rows. 00212 // Int64 getInt (const TableExprId& id) 00213 // { 00214 // // Cast the id to a TableExprIdAggr object. 00215 // const TableExprIdAggr& aid = TableExprIdAggr::cast (id); 00216 // // Get the vector of ids for this group. 00217 // const vector<TableExprId>& ids = aid.result().ids(id.rownr()); 00218 // // Get the values for all ids and accumulate them. 00219 // Int64 sum3 = 0; 00220 // for (vector<TableExprId>::const_iterator it=ids.begin(); 00221 // it!=ids.end(); ++it){ 00222 // Int64 v = operands()[0]->getInt(*it); 00223 // sum3 += v*v*v; 00224 // } 00225 // return sum3; 00226 // } 00227 // }; 00228 // </srcblock> 00229 // </example> 00230 // More examples of UDF functions can be found in classes UDFMSCal 00231 // and DirectionUDF. 00232 00233 class UDFBase 00234 { 00235 public: 00236 // The signature of a global or static member function creating an object 00237 // of the UDF. 00238 typedef UDFBase* MakeUDFObject (const String& functionName); 00239 00240 // Only default constructor is needed. 00241 UDFBase(); 00242 00243 // Destructor. 00244 virtual ~UDFBase(); 00245 00246 // Evaluate the function and return the result. 00247 // Their default implementations throw a "not implemented" exception. 00248 // <group> 00249 virtual Bool getBool (const TableExprId& id); 00250 virtual Int64 getInt (const TableExprId& id); 00251 virtual Double getDouble (const TableExprId& id); 00252 virtual DComplex getDComplex (const TableExprId& id); 00253 virtual String getString (const TableExprId& id); 00254 virtual TaqlRegex getRegex (const TableExprId& id); 00255 virtual MVTime getDate (const TableExprId& id); 00256 virtual MArray<Bool> getArrayBool (const TableExprId& id); 00257 virtual MArray<Int64> getArrayInt (const TableExprId& id); 00258 virtual MArray<Double> getArrayDouble (const TableExprId& id); 00259 virtual MArray<DComplex> getArrayDComplex (const TableExprId& id); 00260 virtual MArray<String> getArrayString (const TableExprId& id); 00261 virtual MArray<MVTime> getArrayDate (const TableExprId& id); 00262 // </group> 00263 00264 // Get the unit. 00265 const String& getUnit() const 00266 { return itsUnit; } 00267 00268 // Get the nodes in the function operands representing an aggregate function. 00269 void getAggrNodes (vector<TableExprNodeRep*>& aggr); 00270 00271 // Get the nodes in the function operands representing a table column. 00272 void getColumnNodes (vector<TableExprNodeRep*>& cols); 00273 00274 private: 00275 // Set up the function object. 00276 virtual void setup (const Table& table, 00277 const TaQLStyle&) = 0; 00278 00279 protected: 00280 // Get the operands. 00281 PtrBlock<TableExprNodeRep*>& operands() 00282 { return itsOperands; } 00283 00284 // Set the data type. 00285 // This function must be called by the setup function of the derived class. 00286 void setDataType (TableExprNodeRep::NodeDataType); 00287 00288 // Set the dimensionality of the results. 00289 // <br> 0 means that the results are scalars. 00290 // <br> -1 means that the results are arrays with unknown dimensionality. 00291 // <br> >0 means that the results are arrays with that dimensionality. 00292 // This function must be called by the setup function of the derived class. 00293 void setNDim (Int ndim); 00294 00295 // Set the shape of the results if it is fixed and known. 00296 void setShape (const IPosition& shape); 00297 00298 // Set the unit of the result. 00299 // If this function is not called by the setup function of the derived 00300 // class, the result has no unit. 00301 void setUnit (const String& unit); 00302 00303 // Define if the result is constant (e.g. if all arguments are constant). 00304 // If this function is not called by the setup function of the derived 00305 // class, the result is not constant. 00306 void setConstant (Bool isConstant); 00307 00308 // Define if the UDF is an aggregate function (usually used in GROUPBY). 00309 void setAggregate (Bool isAggregate); 00310 00311 // Let a derived class recreate its column objects in case a selection 00312 // has to be applied. 00313 // The default implementation does nothing. 00314 virtual void recreateColumnObjects (const Vector<uInt>& rownrs); 00315 00316 public: 00317 // Register the name and construction function of a UDF (thread-safe). 00318 // An exception is thrown if this name already exists with a different 00319 // construction function. 00320 static void registerUDF (const String& name, MakeUDFObject* func); 00321 00322 // Initialize the function object. 00323 void init (const PtrBlock<TableExprNodeRep*>& arg, 00324 const Table& table, const TaQLStyle&); 00325 00326 // Get the data type. 00327 TableExprNodeRep::NodeDataType dataType() const 00328 { return itsDataType; } 00329 00330 // Get the dimensionality of the results. 00331 // (0=scalar, -1=array with variable ndim, >0=array with fixed ndim 00332 Int ndim() const 00333 { return itsNDim; } 00334 00335 // Get the result shape if the same for all results. 00336 const IPosition& shape() const 00337 { return itsShape; } 00338 00339 // Tell if the UDF gives a constant result. 00340 Bool isConstant() const 00341 { return itsIsConstant; } 00342 00343 // Tell if the UDF is an aggregate function. 00344 Bool isAggregate() const 00345 { return itsIsAggregate; } 00346 00347 // Do not apply the selection. 00348 void disableApplySelection() 00349 { itsApplySelection = False; } 00350 00351 // If needed, let the UDF re-create column objects for a selection of rows. 00352 // It calls the function recreateColumnObjects. 00353 void applySelection (const Vector<uInt>& rownrs); 00354 00355 // Create a UDF object (thread-safe). 00356 // It looks in the map with fixed function names. If unknown, 00357 // it looks if a wildcarded function name is supported (for PyTaQL). 00358 static UDFBase* createUDF (const String& name, const TaQLStyle& style); 00359 00360 private: 00361 //# Data members. 00362 PtrBlock<TableExprNodeRep*> itsOperands; 00363 TableExprNodeRep::NodeDataType itsDataType; 00364 Int itsNDim; 00365 IPosition itsShape; 00366 String itsUnit; 00367 Bool itsIsConstant; 00368 Bool itsIsAggregate; 00369 Bool itsApplySelection; 00370 //# The registry is used for two purposes: 00371 //# 1. It is a map of known function names (lib.func) to funcptr. 00372 //# Function name * means that the library can contain any function, 00373 //# which is intended for python functions (through PyTaQL). 00374 //# 2. The loaded libraries are kept in the map (with 0 funcptr). 00375 static map<String, MakeUDFObject*> theirRegistry; 00376 static Mutex theirMutex; 00377 }; 00378 00379 } // end namespace 00380 00381 #endif