cctbx: scitbx/line_search/more_thuente_1994

00001 #ifndef SCITBX_LINE_SEARCH_MORE_THUENTE_1994_RAW_H
00002 #define SCITBX_LINE_SEARCH_MORE_THUENTE_1994_RAW_H
00003 
00004 #include <vector>
00005 #include <stdexcept>
00006 #include <cmath>
00007 #include <cstddef>
00008 
00009 namespace scitbx { namespace line_search {
00010 
00011   template <typename FloatType=double>
00012   class mcsrch
00013   {
00014     protected:
00015       int infoc;
00016       FloatType dginit;
00017       bool brackt;
00018       bool stage1;
00019       FloatType finit;
00020       FloatType dgtest;
00021       FloatType width;
00022       FloatType width1;
00023       FloatType stx;
00024       FloatType fx;
00025       FloatType dgx;
00026       FloatType sty;
00027       FloatType fy;
00028       FloatType dgy;
00029       FloatType stmin;
00030       FloatType stmax;
00031       std::vector<FloatType> initial_x;
00032 
00033       static
00034       FloatType
00035       pow2(FloatType const& x) { return x * x; }
00036 
00037       static
00038       FloatType const&
00039       max3(
00040         FloatType const& x,
00041         FloatType const& y,
00042         FloatType const& z)
00043       {
00044         return x < y ? (y < z ? z : y ) : (x < z ? z : x );
00045       }
00046 
00047     public:
00048       const char* info_meaning;
00049 
00050       void
00051       free_workspace() { initial_x = std::vector<FloatType>(); }
00052 
00053       /* Minimize a function along a search direction. This code is
00054          a Java translation of the function <code>MCSRCH</code> from
00055          <code>lbfgs.f</code>, which in turn is a slight modification
00056          of the subroutine <code>CSRCH</code> of More' and Thuente.
00057          The changes are to allow reverse communication, and do not
00058          affect the performance of the routine. This function, in turn,
00059          calls <code>mcstep</code>.<p>
00060 
00061          The Java translation was effected mostly mechanically, with
00062          some manual clean-up; in particular, array indices start at 0
00063          instead of 1.  Most of the comments from the Fortran code have
00064          been pasted in here as well.<p>
00065 
00066          The purpose of <code>mcsrch</code> is to find a step which
00067          satisfies a sufficient decrease condition and a curvature
00068          condition.<p>
00069 
00070          At each stage this function updates an interval of uncertainty
00071          with endpoints <code>stx</code> and <code>sty</code>. The
00072          interval of uncertainty is initially chosen so that it
00073          contains a minimizer of the modified function
00074          <pre>
00075               f(x+stp*s) - f(x) - ftol*stp*(gradf(x)'s).
00076          </pre>
00077          If a step is obtained for which the modified function has a
00078          nonpositive function value and nonnegative derivative, then
00079          the interval of uncertainty is chosen so that it contains a
00080          minimizer of <code>f(x+stp*s)</code>.<p>
00081 
00082          The algorithm is designed to find a step which satisfies
00083          the sufficient decrease condition
00084          <pre>
00085                f(x+stp*s) &lt;= f(X) + ftol*stp*(gradf(x)'s),
00086          </pre>
00087          and the curvature condition
00088          <pre>
00089                abs(gradf(x+stp*s)'s)) &lt;= gtol*abs(gradf(x)'s).
00090          </pre>
00091          If <code>ftol</code> is less than <code>gtol</code> and if,
00092          for example, the function is bounded below, then there is
00093          always a step which satisfies both conditions. If no step can
00094          be found which satisfies both conditions, then the algorithm
00095          usually stops when rounding errors prevent further progress.
00096          In this case <code>stp</code> only satisfies the sufficient
00097          decrease condition.<p>
00098 
00099          @author Original Fortran version by Jorge J. More' and
00100            David J. Thuente as part of the Minpack project, June 1983,
00101            Argonne National Laboratory. Java translation by Robert
00102            Dodier, August 1997.
00103 
00104          @param n The number of variables.
00105 
00106          @param x On entry this contains the base point for the line
00107            search. On exit it contains <code>x + stp*s</code>.
00108 
00109          @param f On entry this contains the value of the objective
00110            function at <code>x</code>. On exit it contains the value
00111            of the objective function at <code>x + stp*s</code>.
00112 
00113          @param g On entry this contains the gradient of the objective
00114            function at <code>x</code>. On exit it contains the gradient
00115            at <code>x + stp*s</code>.
00116 
00117          @param s The search direction.
00118 
00119          @param stp On entry this contains an initial estimate of a
00120            satifactory step length. On exit <code>stp</code> contains
00121            the final estimate.
00122 
00123          @param ftol Tolerance for the sufficient decrease condition.
00124 
00125          @param xtol Termination occurs when the relative width of the
00126            interval of uncertainty is at most <code>xtol</code>.
00127 
00128          @param maxfev Termination occurs when the number of evaluations
00129            of the objective function is at least <code>maxfev</code> by
00130            the end of an iteration.
00131 
00132          @param info This is an output variable, which can have these
00133            values:
00134            <ul>
00135            <li><code>info = -1</code> A return is made to compute
00136                the function and gradient.
00137            <li><code>info = 1</code> The sufficient decrease condition
00138                and the directional derivative condition hold.
00139            </ul>
00140 
00141          @param nfev On exit, this is set to the number of function
00142            evaluations.
00143        */
00144       void
00145       run(
00146         FloatType const& gtol,
00147         FloatType const& stpmin,
00148         FloatType const& stpmax,
00149         unsigned n,
00150         FloatType* x,
00151         FloatType f,
00152         const FloatType* g,
00153         const FloatType* s,
00154         FloatType& stp,
00155         FloatType ftol,
00156         FloatType xtol,
00157         unsigned maxfev,
00158         int& info,
00159         unsigned& nfev);
00160 
00161       /* The purpose of this function is to compute a safeguarded step
00162          for a linesearch and to update an interval of uncertainty for
00163          a minimizer of the function.<p>
00164 
00165          The parameter <code>stx</code> contains the step with the
00166          least function value. The parameter <code>stp</code> contains
00167          the current step. It is assumed that the derivative at
00168          <code>stx</code> is negative in the direction of the step. If
00169          <code>brackt</code> is <code>true</code> when
00170          <code>mcstep</code> returns then a minimizer has been
00171          bracketed in an interval of uncertainty with endpoints
00172          <code>stx</code> and <code>sty</code>.<p>
00173 
00174          Variables that must be modified by <code>mcstep</code> are
00175          implemented as 1-element arrays.
00176 
00177          @param stx Step at the best step obtained so far.
00178            This variable is modified by <code>mcstep</code>.
00179          @param fx Function value at the best step obtained so far.
00180            This variable is modified by <code>mcstep</code>.
00181          @param dx Derivative at the best step obtained so far.
00182            The derivative must be negative in the direction of the
00183            step, that is, <code>dx</code> and <code>stp-stx</code> must
00184            have opposite signs.  This variable is modified by
00185            <code>mcstep</code>.
00186 
00187          @param sty Step at the other endpoint of the interval of
00188            uncertainty. This variable is modified by <code>mcstep</code>.
00189          @param fy Function value at the other endpoint of the interval
00190            of uncertainty. This variable is modified by
00191            <code>mcstep</code>.
00192 
00193          @param dy Derivative at the other endpoint of the interval of
00194            uncertainty. This variable is modified by <code>mcstep</code>.
00195 
00196          @param stp Step at the current step. If <code>brackt</code> is set
00197            then on input <code>stp</code> must be between <code>stx</code>
00198            and <code>sty</code>. On output <code>stp</code> is set to the
00199            new step.
00200          @param fp Function value at the current step.
00201          @param dp Derivative at the current step.
00202 
00203          @param brackt Tells whether a minimizer has been bracketed.
00204            If the minimizer has not been bracketed, then on input this
00205            variable must be set <code>false</code>. If the minimizer has
00206            been bracketed, then on output this variable is
00207            <code>true</code>.
00208 
00209          @param stpmin Lower bound for the step.
00210          @param stpmax Upper bound for the step.
00211 
00212          If the return value is 1, 2, 3, or 4, then the step has
00213          been computed successfully. A return value of 0 indicates
00214          improper input parameters.
00215 
00216          @author Jorge J. More, David J. Thuente: original Fortran version,
00217            as part of Minpack project. Argonne Nat'l Laboratory, June 1983.
00218            Robert Dodier: Java translation, August 1997.
00219        */
00220       static
00221       int
00222       mcstep(
00223         FloatType& stx,
00224         FloatType& fx,
00225         FloatType& dx,
00226         FloatType& sty,
00227         FloatType& fy,
00228         FloatType& dy,
00229         FloatType& stp,
00230         FloatType fp,
00231         FloatType dp,
00232         bool& brackt,
00233         FloatType stpmin,
00234         FloatType stpmax);
00235   };
00236 
00237   template <typename FloatType>
00238   void mcsrch<FloatType>::run(
00239     FloatType const& gtol,
00240     FloatType const& stpmin,
00241     FloatType const& stpmax,
00242     unsigned n,
00243     FloatType* x,
00244     FloatType f,
00245     const FloatType* g,
00246     const FloatType* s,
00247     FloatType& stp,
00248     FloatType ftol,
00249     FloatType xtol,
00250     unsigned maxfev,
00251     int& info,
00252     unsigned& nfev)
00253   {
00254     if (info != -1) {
00255       infoc = 1;
00256       if (   n == 0
00257           || maxfev == 0
00258           || gtol < FloatType(0)
00259           || xtol < FloatType(0)
00260           || stpmin < FloatType(0)
00261           || stpmax < stpmin) {
00262         throw std::runtime_error("Improper input parameters.");
00263       }
00264       if (stp <= FloatType(0) || ftol < FloatType(0)) {
00265         throw std::runtime_error("Improper value for stp or ftol.");
00266       }
00267       // Compute the initial gradient in the search direction
00268       // and check that s is a descent direction.
00269       dginit = FloatType(0);
00270       for (unsigned j = 0; j < n; j++) {
00271         dginit += g[j] * s[j];
00272       }
00273       if (dginit >= FloatType(0)) {
00274         throw std::runtime_error("Search direction not descent.");
00275       }
00276       brackt = false;
00277       stage1 = true;
00278       nfev = 0;
00279       finit = f;
00280       dgtest = ftol*dginit;
00281       width = stpmax - stpmin;
00282       width1 = FloatType(2) * width;
00283       initial_x.assign(x, x+n);
00284       // The variables stx, fx, dgx contain the values of the step,
00285       // function, and directional derivative at the best step.
00286       // The variables sty, fy, dgy contain the value of the step,
00287       // function, and derivative at the other endpoint of
00288       // the interval of uncertainty.
00289       // The variables stp, f, dg contain the values of the step,
00290       // function, and derivative at the current step.
00291       stx = FloatType(0);
00292       fx = finit;
00293       dgx = dginit;
00294       sty = FloatType(0);
00295       fy = finit;
00296       dgy = dginit;
00297     }
00298     for (;;) {
00299       if (info != -1) {
00300         // Set the minimum and maximum steps to correspond
00301         // to the present interval of uncertainty.
00302         if (brackt) {
00303           stmin = std::min(stx, sty);
00304           stmax = std::max(stx, sty);
00305         }
00306         else {
00307           stmin = stx;
00308           stmax = stp + FloatType(4) * (stp - stx);
00309         }
00310         // Force the step to be within the bounds stpmax and stpmin.
00311         stp = std::max(stp, stpmin);
00312         stp = std::min(stp, stpmax);
00313         // If an unusual termination is to occur then let
00314         // stp be the lowest point obtained so far.
00315         if (   (brackt && (stp <= stmin || stp >= stmax))
00316             || nfev >= maxfev - 1 || infoc == 0
00317             || (brackt && stmax - stmin <= xtol * stmax)) {
00318           stp = stx;
00319         }
00320         // Evaluate the function and gradient at stp
00321         // and compute the directional derivative.
00322         // We return to main program to obtain F and G.
00323         for (unsigned j = 0; j < n; j++) {
00324           x[j] = initial_x[j] + stp * s[j];
00325         }
00326         info = -1;
00327         info_meaning =
00328           "A return is made to compute the function and gradient.";
00329         break;
00330       }
00331       info = 0;
00332       info_meaning = 0;
00333       nfev++;
00334       FloatType dg(0);
00335       for (unsigned j = 0; j < n; j++) {
00336         dg += g[j] * s[j];
00337       }
00338       FloatType ftest1 = finit + stp*dgtest;
00339       // Test for convergence.
00340       if ((brackt && (stp <= stmin || stp >= stmax)) || infoc == 0) {
00341         info = 6;
00342         info_meaning =
00343           "Rounding errors prevent further progress."
00344           " There may not be a step which satisfies the"
00345           " sufficient decrease and curvature conditions."
00346           " Tolerances may be too small.";
00347         break;
00348       }
00349       if (stp == stpmax && f <= ftest1 && dg <= dgtest) {
00350         info = 5;
00351         info_meaning = "The step is at the upper bound stpmax.";
00352         break;
00353       }
00354       if (stp == stpmin && (f > ftest1 || dg >= dgtest)) {
00355         info = 4;
00356         info_meaning = "The step is at the lower bound stpmin.";
00357         break;
00358       }
00359       if (nfev >= maxfev) {
00360         info = 3;
00361         info_meaning = "Number of function evaluations has reached maxfev.";
00362         break;
00363       }
00364       if (brackt && stmax - stmin <= xtol * stmax) {
00365         info = 2;
00366         info_meaning =
00367           "Relative width of the interval of uncertainty"
00368           " is at most xtol.";
00369         break;
00370       }
00371       // Check for termination.
00372       if (f <= ftest1 && std::abs(dg) <= gtol * (-dginit)) {
00373         info = 1;
00374         info_meaning =
00375           "The sufficient decrease condition and the"
00376           " directional derivative condition hold.";
00377         break;
00378       }
00379       // In the first stage we seek a step for which the modified
00380       // function has a nonpositive value and nonnegative derivative.
00381       if (   stage1 && f <= ftest1
00382           && dg >= std::min(ftol, gtol) * dginit) {
00383         stage1 = false;
00384       }
00385       // A modified function is used to predict the step only if
00386       // we have not obtained a step for which the modified
00387       // function has a nonpositive function value and nonnegative
00388       // derivative, and if a lower function value has been
00389       // obtained but the decrease is not sufficient.
00390       if (stage1 && f <= fx && f > ftest1) {
00391         // Define the modified function and derivative values.
00392         FloatType fm = f - stp*dgtest;
00393         FloatType fxm = fx - stx*dgtest;
00394         FloatType fym = fy - sty*dgtest;
00395         FloatType dgm = dg - dgtest;
00396         FloatType dgxm = dgx - dgtest;
00397         FloatType dgym = dgy - dgtest;
00398         // Call cstep to update the interval of uncertainty
00399         // and to compute the new step.
00400         infoc = mcstep(stx, fxm, dgxm, sty, fym, dgym, stp, fm, dgm,
00401                        brackt, stmin, stmax);
00402         // Reset the function and gradient values for f.
00403         fx = fxm + stx*dgtest;
00404         fy = fym + sty*dgtest;
00405         dgx = dgxm + dgtest;
00406         dgy = dgym + dgtest;
00407       }
00408       else {
00409         // Call mcstep to update the interval of uncertainty
00410         // and to compute the new step.
00411         infoc = mcstep(stx, fx, dgx, sty, fy, dgy, stp, f, dg,
00412                        brackt, stmin, stmax);
00413       }
00414       // Force a sufficient decrease in the size of the
00415       // interval of uncertainty.
00416       if (brackt) {
00417         if (std::abs(sty - stx) >= FloatType(0.66) * width1) {
00418           stp = stx + FloatType(0.5) * (sty - stx);
00419         }
00420         width1 = width;
00421         width = std::abs(sty - stx);
00422       }
00423     }
00424   }
00425 
00426   template <typename FloatType>
00427   int mcsrch<FloatType>::mcstep(
00428     FloatType& stx,
00429     FloatType& fx,
00430     FloatType& dx,
00431     FloatType& sty,
00432     FloatType& fy,
00433     FloatType& dy,
00434     FloatType& stp,
00435     FloatType fp,
00436     FloatType dp,
00437     bool& brackt,
00438     FloatType stpmin,
00439     FloatType stpmax)
00440   {
00441     bool bound;
00442     FloatType gamma, p, q, r, s, sgnd, stpc, stpf, stpq, theta;
00443     int info = 0;
00444     if (   (   brackt && (stp <= std::min(stx, sty)
00445             || stp >= std::max(stx, sty)))
00446         || dx * (stp - stx) >= FloatType(0) || stpmax < stpmin) {
00447       return 0;
00448     }
00449     // Determine if the derivatives have opposite sign.
00450     sgnd = dp * (dx / std::abs(dx));
00451     if (fp > fx) {
00452       // First case. A higher function value.
00453       // The minimum is bracketed. If the cubic step is closer
00454       // to stx than the quadratic step, the cubic step is taken,
00455       // else the average of the cubic and quadratic steps is taken.
00456       info = 1;
00457       bound = true;
00458       theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp;
00459       s = max3(std::abs(theta), std::abs(dx), std::abs(dp));
00460       gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s));
00461       if (stp < stx) gamma = - gamma;
00462       p = (gamma - dx) + theta;
00463       q = ((gamma - dx) + gamma) + dp;
00464       r = p/q;
00465       stpc = stx + r * (stp - stx);
00466       stpq = stx
00467         + ((dx / ((fx - fp) / (stp - stx) + dx)) / FloatType(2))
00468           * (stp - stx);
00469       if (std::abs(stpc - stx) < std::abs(stpq - stx)) {
00470         stpf = stpc;
00471       }
00472       else {
00473         stpf = stpc + (stpq - stpc) / FloatType(2);
00474       }
00475       brackt = true;
00476     }
00477     else if (sgnd < FloatType(0)) {
00478       // Second case. A lower function value and derivatives of
00479       // opposite sign. The minimum is bracketed. If the cubic
00480       // step is closer to stx than the quadratic (secant) step,
00481       // the cubic step is taken, else the quadratic step is taken.
00482       info = 2;
00483       bound = false;
00484       theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp;
00485       s = max3(std::abs(theta), std::abs(dx), std::abs(dp));
00486       gamma = s * std::sqrt(pow2(theta / s) - (dx / s) * (dp / s));
00487       if (stp > stx) gamma = - gamma;
00488       p = (gamma - dp) + theta;
00489       q = ((gamma - dp) + gamma) + dx;
00490       r = p/q;
00491       stpc = stp + r * (stx - stp);
00492       stpq = stp + (dp / (dp - dx)) * (stx - stp);
00493       if (std::abs(stpc - stp) > std::abs(stpq - stp)) {
00494         stpf = stpc;
00495       }
00496       else {
00497         stpf = stpq;
00498       }
00499       brackt = true;
00500     }
00501     else if (std::abs(dp) < std::abs(dx)) {
00502       // Third case. A lower function value, derivatives of the
00503       // same sign, and the magnitude of the derivative decreases.
00504       // The cubic step is only used if the cubic tends to infinity
00505       // in the direction of the step or if the minimum of the cubic
00506       // is beyond stp. Otherwise the cubic step is defined to be
00507       // either stpmin or stpmax. The quadratic (secant) step is also
00508       // computed and if the minimum is bracketed then the the step
00509       // closest to stx is taken, else the step farthest away is taken.
00510       info = 3;
00511       bound = true;
00512       theta = FloatType(3) * (fx - fp) / (stp - stx) + dx + dp;
00513       s = max3(std::abs(theta), std::abs(dx), std::abs(dp));
00514       gamma = s * std::sqrt(
00515         std::max(FloatType(0), pow2(theta / s) - (dx / s) * (dp / s)));
00516       if (stp > stx) gamma = -gamma;
00517       p = (gamma - dp) + theta;
00518       q = (gamma + (dx - dp)) + gamma;
00519       r = p/q;
00520       if (r < FloatType(0) && gamma != FloatType(0)) {
00521         stpc = stp + r * (stx - stp);
00522       }
00523       else if (stp > stx) {
00524         stpc = stpmax;
00525       }
00526       else {
00527         stpc = stpmin;
00528       }
00529       stpq = stp + (dp / (dp - dx)) * (stx - stp);
00530       if (brackt) {
00531         if (std::abs(stp - stpc) < std::abs(stp - stpq)) {
00532           stpf = stpc;
00533         }
00534         else {
00535           stpf = stpq;
00536         }
00537       }
00538       else {
00539         if (std::abs(stp - stpc) > std::abs(stp - stpq)) {
00540           stpf = stpc;
00541         }
00542         else {
00543           stpf = stpq;
00544         }
00545       }
00546     }
00547     else {
00548       // Fourth case. A lower function value, derivatives of the
00549       // same sign, and the magnitude of the derivative does
00550       // not decrease. If the minimum is not bracketed, the step
00551       // is either stpmin or stpmax, else the cubic step is taken.
00552       info = 4;
00553       bound = false;
00554       if (brackt) {
00555         theta = FloatType(3) * (fp - fy) / (sty - stp) + dy + dp;
00556         s = max3(std::abs(theta), std::abs(dy), std::abs(dp));
00557         gamma = s * std::sqrt(pow2(theta / s) - (dy / s) * (dp / s));
00558         if (stp > sty) gamma = -gamma;
00559         p = (gamma - dp) + theta;
00560         q = ((gamma - dp) + gamma) + dy;
00561         r = p/q;
00562         stpc = stp + r * (sty - stp);
00563         stpf = stpc;
00564       }
00565       else if (stp > stx) {
00566         stpf = stpmax;
00567       }
00568       else {
00569         stpf = stpmin;
00570       }
00571     }
00572     // Update the interval of uncertainty. This update does not
00573     // depend on the new step or the case analysis above.
00574     if (fp > fx) {
00575       sty = stp;
00576       fy = fp;
00577       dy = dp;
00578     }
00579     else {
00580       if (sgnd < FloatType(0)) {
00581         sty = stx;
00582         fy = fx;
00583         dy = dx;
00584       }
00585       stx = stp;
00586       fx = fp;
00587       dx = dp;
00588     }
00589     // Compute the new step and safeguard it.
00590     stpf = std::min(stpmax, stpf);
00591     stpf = std::max(stpmin, stpf);
00592     stp = stpf;
00593     if (brackt && bound) {
00594       if (sty > stx) {
00595         stp = std::min(stx + FloatType(0.66) * (sty - stx), stp);
00596       }
00597       else {
00598         stp = std::max(stx + FloatType(0.66) * (sty - stx), stp);
00599       }
00600     }
00601     return info;
00602   }
00603 
00604 }} // namespace scitbx::line_search
00605 
00606 #endif // SCITBX_LINE_SEARCH_MORE_THUENTE_1994_RAW_H
scitbx/line_search/more_thuente_1994_raw.h