FORM  4.3
optimize.cc
Go to the documentation of this file.
1 
6 /* #[ License : */
7 /*
8  * Copyright (C) 1984-2022 J.A.M. Vermaseren
9  * When using this file you are requested to refer to the publication
10  * J.A.M.Vermaseren "New features of FORM" math-ph/0010025
11  * This is considered a matter of courtesy as the development was paid
12  * for by FOM the Dutch physics granting agency and we would like to
13  * be able to track its scientific use to convince FOM of its value
14  * for the community.
15  *
16  * This file is part of FORM.
17  *
18  * FORM is free software: you can redistribute it and/or modify it under the
19  * terms of the GNU General Public License as published by the Free Software
20  * Foundation, either version 3 of the License, or (at your option) any later
21  * version.
22  *
23  * FORM is distributed in the hope that it will be useful, but WITHOUT ANY
24  * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25  * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
26  * details.
27  *
28  * You should have received a copy of the GNU General Public License along
29  * with FORM. If not, see <http://www.gnu.org/licenses/>.
30  */
31 /*
32  #] License :
33  #[ includes :
34 */
35 
36 //#define DEBUG
37 //#define DEBUG_MORE
38 //#define DEBUG_MCTS
39 //#define DEBUG_GREEDY
40 
41 #ifdef HAVE_CONFIG_H
42 #ifndef CONFIG_H_INCLUDED
43 #define CONFIG_H_INCLUDED
44 #include <config.h>
45 #endif
46 #endif
47 
48 #include <vector>
49 #include <stack>
50 #include <algorithm>
51 #include <set>
52 #include <map>
53 #include <climits>
54 #include <cmath>
55 #include <string>
56 #include <algorithm>
57 #include <iostream>
58 
59 #ifdef APPLE64
60 #define HAVE_UNORDERED_MAP
61 #define HAVE_UNORDERED_SET
62 #endif
63 
64 #ifdef HAVE_UNORDERED_MAP
65  #include <unordered_map>
66  using std::unordered_map;
67 #elif !defined(HAVE_TR1_UNORDERED_MAP) && defined(HAVE_BOOST_UNORDERED_MAP_HPP)
68  #include <boost/unordered_map.hpp>
69  using boost::unordered_map;
70 #else
71  #include <tr1/unordered_map>
72  using std::tr1::unordered_map;
73 #endif
74 #ifdef HAVE_UNORDERED_SET
75  #include <unordered_set>
76  using std::unordered_set;
77 #elif !defined(HAVE_TR1_UNORDERED_SET) && defined(HAVE_BOOST_UNORDERED_SET_HPP)
78  #include <boost/unordered_set.hpp>
79  using boost::unordered_set;
80 #else
81  #include <tr1/unordered_set>
82  using std::tr1::unordered_set;
83 #endif
84 
85 #if defined(HAVE_BUILTIN_POPCOUNT)
86  static inline int popcount(unsigned int x) {
87  return __builtin_popcount(x);
88  }
89 #elif defined(HAVE_POPCNT)
90  #include <intrin.h>
91  static inline int popcount(unsigned int x) {
92  return __popcnt(x);
93  }
94 #else
95  static inline int popcount(unsigned int x) {
96  int count = 0;
97  while (x > 0) {
98  if ((x & 1) == 1) count++;
99  x >>= 1;
100  }
101  return count;
102  }
103 #endif
104 
105 extern "C" {
106 #include "form3.h"
107 }
108 
109 //#ifdef DEBUG
110 #include "mytime.h"
111 //#endif
112 
113 using namespace std;
114 
115 // operators
116 const WORD OPER_ADD = -1;
117 const WORD OPER_MUL = -2;
118 const WORD OPER_COMMA = -3;
119 
120 // class for a node of the MCTS tree
121 class tree_node {
122 public:
123  vector<tree_node> childs;
124  double sum_results;
125  int num_visits;
126  WORD var;
127  bool finished;
128  PADPOINTER(1,1,1,1);
129 
130  tree_node (int _var=0):
131  sum_results(0), num_visits(0), var(_var), finished(false) {}
132 };
133 
134 // global variables for multithreading
135 WORD *optimize_expr;
136 vector<vector<WORD> > optimize_best_Horner_schemes;
137 int optimize_num_vars;
138 int optimize_best_num_oper;
139 vector<WORD> optimize_best_instr;
140 vector<WORD> optimize_best_vars;
141 
142 // global variables for MCTS
143 bool mcts_factorized, mcts_separated;
144 vector<WORD> mcts_vars;
145 tree_node mcts_root;
146 int mcts_expr_score;
147 set<pair<int,vector<WORD> > > mcts_best_schemes;
148 
149 #ifdef WITHPTHREADS
150 pthread_mutex_t optimize_lock;
151 #endif
152 
153 /*
154  #] includes :
155  #[ print_instr :
156 */
157 
158 void print_instr (const vector<WORD> &instr, WORD num)
159 {
160  const WORD *tbegin = &*instr.begin();
161  const WORD *tend = tbegin+instr.size();
162  for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
163  MesPrint("<%d> %a",num,t[2],t);
164  }
165 }
166 
167 /*
168  #] print_instr :
169  #[ my_random_shuffle :
170 */
171 
179 template <class RandomAccessIterator>
180 void my_random_shuffle (PHEAD RandomAccessIterator fr, RandomAccessIterator to) {
181  for (int i=to-fr-1; i>0; --i)
182  swap (fr[i],fr[wranf(BHEAD0) % (i+1)]);
183 }
184 
185 /*
186  #] my_random_shuffle :
187  #[ get_expression :
188 */
189 
190 static WORD comlist[3] = {TYPETOPOLYNOMIAL,3,DOALL};
191 
204 LONG get_expression (int exprnr) {
205 
206  GETIDENTITY;
207 
208  AR.NoCompress = 1;
209 
210  NewSort(BHEAD0);
211  EXPRESSIONS e = Expressions+exprnr;
212  SetScratch(AR.infile,&(e->onfile));
213 
214  // get header term
215  WORD *term = AT.WorkPointer;
216  GetTerm(BHEAD term);
217 
218  NewSort(BHEAD0);
219 
220  // get terms
221  while (GetTerm(BHEAD term) > 0) {
222  AT.WorkPointer = term + *term;
223  WORD *t1 = term;
224  WORD *t2 = term + *term;
225  if (ConvertToPoly(BHEAD t1,t2,comlist,1) < 0) return -1;
226  int n = *t2;
227  NCOPY(t1,t2,n);
228  AT.WorkPointer = term + *term;
229  if (StoreTerm(BHEAD term)) return -1;
230  }
231 
232  // sort and store in buffer
233  LONG len = EndSort(BHEAD (WORD *)((VOID *)(&optimize_expr)),2);
234  LowerSortLevel();
235  AT.WorkPointer = term;
236 
237  return len;
238 }
239 
240 /*
241  #] get_expression :
242  #[ PF_get_expression :
243 */
244 #ifdef WITHMPI
245 
246 // get_expression for ParFORM
247 LONG PF_get_expression (int exprnr) {
248  LONG len;
249  if (PF.me == MASTER) {
250  len = get_expression(exprnr);
251  }
252  if (PF.numtasks > 1) {
253  PF_BroadcastBuffer(&optimize_expr, &len);
254  }
255  return len;
256 }
257 
258 // replace get_expression called in Optimize
259 #define get_expression PF_get_expression
260 
261 #endif
262 /*
263  #] PF_get_expression :
264  #[ get_brackets :
265 */
266 
281 vector<vector<WORD> > get_brackets () {
282 
283  // check for brackets in expression
284  bool has_brackets = false;
285  for (WORD *t=optimize_expr; *t!=0; t+=*t) {
286  WORD *tend=t+*t; tend-=ABS(*(tend-1));
287  for (WORD *t1=t+1; t1<tend; t1+=*(t1+1))
288  if (*t1 == HAAKJE)
289  has_brackets=true;
290  }
291 
292  // replace brackets by SEPARATESYMBOL
293  vector<vector<WORD> > brackets;
294 
295  if (has_brackets) {
296  int exprlen=10; // we need potential space for an empty bracket
297  for (WORD *t=optimize_expr; *t!=0; t+=*t)
298  exprlen += *t;
299  WORD *newexpr = (WORD *)Malloc1(exprlen*sizeof(WORD), "optimize newexpr");
300 
301  int i=0;
302  int sep_power = 0;
303 
304  for (WORD *t=optimize_expr; *t!=0; t+=*t) {
305  WORD *t1 = t+1;
306 
307  // scan for bracket
308  vector<WORD> bracket;
309  for (; *t1!=HAAKJE; t1+=*(t1+1))
310  bracket.insert(bracket.end(), t1, t1+*(t1+1));
311 
312  if (brackets.size()==0 || bracket!=brackets.back()) {
313  sep_power++;
314  brackets.push_back(bracket);
315  }
316  t1+=*(t1+1);
317 
318  WORD left = t + *t - t1;
319  bool more_symbols = (left != ABS(*(t+*t-1)));
320 
321  // add power of SEPARATESYMBOL
322  newexpr[i++] = 1 + left + (more_symbols ? 2 : 4);
323  newexpr[i++] = SYMBOL;
324  newexpr[i++] = (more_symbols ? *(t1+1) + 2 : 4);
325  newexpr[i++] = SEPARATESYMBOL;
326  newexpr[i++] = sep_power;
327 
328  // add remaining terms
329  if (more_symbols) {
330  t1+=2;
331  left-=2;
332  }
333  while (left-->0)
334  newexpr[i++] = *(t1++);
335  }
336 /*
337  We insert here an empty bracket that is zero.
338  It is used for the case that there is only a single bracket which is
339  outside the notation for trees at a later stage.
340 
341  There may be a problem now in that in the case of sep_power==1
342  newexpr is bigger than optimize_expr. We have to check that.
343 */
344  if ( sep_power == 1 )
345  {
346  WORD *t;
347  vector<WORD> bracket;
348  bracket.push_back(0);
349  bracket.push_back(0);
350  bracket.push_back(0);
351  bracket.push_back(0);
352  sep_power++;
353  brackets.push_back(bracket);
354  newexpr[i++] = 8;
355  newexpr[i++] = SYMBOL;
356  newexpr[i++] = 4;
357  newexpr[i++] = SEPARATESYMBOL;
358  newexpr[i++] = sep_power;
359  newexpr[i++] = 1;
360  newexpr[i++] = 1;
361  newexpr[i++] = 3;
362  newexpr[i++] = 0;
363  for (t=optimize_expr; *t!=0; t+=*t) {}
364  if ( t-optimize_expr+1 < i ) { // We have to redo this
365  M_free(optimize_expr,"$-sort space");
366  optimize_expr = (WORD *)Malloc1(i*sizeof(WORD),"$-sort space");
367  }
368  }
369  else {
370  newexpr[i++] = 0;
371  }
372  memcpy(optimize_expr, newexpr, i*sizeof(WORD));
373  M_free(newexpr,"optimize newexpr");
374 
375  // if factorized, replace SEP by FAC and remove brackets
376  if (brackets[0].size()>0 && brackets[0][2]==FACTORSYMBOL) {
377  for (WORD *t=optimize_expr; *t!=0; t+=*t) {
378  if (*t == ABS(*(t+*t-1))+1) continue;
379  if (t[1]==SYMBOL)
380  for (int i=3; i<t[2]; i+=2)
381  if (t[i]==SEPARATESYMBOL) t[i]=FACTORSYMBOL;
382  }
383  return vector<vector<WORD> >();
384  }
385  }
386 
387  return brackets;
388 }
389 
390 /*
391  #] get_brackets :
392  #[ count_operators :
393 */
394 
401 int count_operators (const WORD *expr, bool print=false) {
402 
403  int n=0;
404  while (*(expr+n)!=0) n+=*(expr+n);
405 
406  int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
407  WORD maxpowfac=1, maxpowsep=1;
408 
409  for (const WORD *t=expr; *t!=0; t+=*t) {
410  if (t!=expr) cntadd++; // new term
411  if (*t==ABS(*(t+*t-1))+1) continue; // only coefficient
412 
413  int cntsym=0;
414 
415  if (t[1]==SYMBOL)
416  for (int i=3; i<t[2]; i+=2) {
417  if (t[i]==FACTORSYMBOL) {
418  maxpowfac = max(maxpowfac, t[i+1]);
419  continue;
420  }
421  if (t[i]==SEPARATESYMBOL) {
422  maxpowsep = max(maxpowsep, t[i+1]);
423  continue;
424  }
425  if (t[i+1]>2) { // (extra)symbol power>2
426  cntpow++;
427  sumpow += (int)floor(log(t[i+1])/log(2.0)) + popcount(t[i+1]) - 1;
428  }
429  if (t[i+1]==2) cntmul++; // (extra)symbol squared
430  cntsym++;
431  }
432 
433  if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntsym++; // non +/-1 coefficient
434 
435  if (cntsym > 0) cntmul+=cntsym-1;
436  }
437 
438  cntadd -= maxpowfac-1;
439  cntmul += maxpowfac-1;
440 
441  cntadd -= maxpowsep-1;
442 
443  if (print)
444  MesPrint ("*** STATS: original %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
445 
446  return sumpow+cntmul+cntadd;
447 }
448 
455 int count_operators (const vector<WORD> &instr, bool print=false) {
456 
457  int cntpow=0, cntmul=0, cntadd=0, sumpow=0;
458 
459  const WORD *ebegin = &*instr.begin();
460  const WORD *eend = ebegin+instr.size();
461 
462  for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
463  for (const WORD *t=e+3; *t!=0; t+=*t) {
464  if (t!=e+3) {
465  if (*(e+1)==OPER_ADD) cntadd++; // new term
466  if (*(e+1)==OPER_MUL) cntmul++; // new term
467  }
468  if (*t == ABS(*(t+*t-1))+1) continue; // only coefficient
469  if (*(t+1)==SYMBOL || *(t+1)==EXTRASYMBOL) {
470  if (*(t+4)==2) cntmul++; // (extra)symbol squared
471  if (*(t+4)>2) { // (extra)symbol power>2
472  cntpow++;
473  sumpow += (int)floor(log(*(t+4))/log(2.0)) + popcount(*(t+4)) - 1;
474  }
475  }
476  if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) cntmul++; // non +/-1 coefficient
477  }
478  }
479 
480  if (print)
481  MesPrint ("*** STATS: optimized %lP %lM %lA : %l", cntpow,cntmul,cntadd,sumpow+cntmul+cntadd);
482 
483  return sumpow+cntmul+cntadd;
484 }
485 
486 /*
487  #] count_operators :
488  #[ occurrence_order :
489 */
490 
498 vector<WORD> occurrence_order (const WORD *expr, bool rev) {
499 
500  // count the number of occurrences of variables
501  map<WORD,int> cnt;
502  for (const WORD *t=expr; *t!=0; t+=*t) {
503  if (*t == ABS(*(t+*t-1))+1) continue; // skip constant term
504  if (t[1] == SYMBOL)
505  for (int i=3; i<t[2]; i+=2)
506  cnt[t[i]]++;
507  }
508 
509  bool is_fac=false, is_sep=false;
510  if (cnt.count(FACTORSYMBOL)) {
511  is_fac=true;
512  cnt.erase(FACTORSYMBOL);
513  }
514  if (cnt.count(SEPARATESYMBOL)) {
515  is_sep=true;
516  cnt.erase(SEPARATESYMBOL);
517  }
518 
519  // determine the order of the variables
520  vector<pair<int,WORD> > cnt_order;
521  for (map<WORD,int>::iterator i=cnt.begin(); i!=cnt.end(); i++)
522  cnt_order.push_back(make_pair(i->second, i->first));
523  sort(cnt_order.rbegin(), cnt_order.rend());
524 
525  // create resulting order
526  vector<WORD> order;
527  for (int i=0; i<(int)cnt_order.size(); i++)
528  order.push_back(cnt_order[i].second);
529 
530  if (rev) reverse(order.begin(),order.end());
531 
532  // add FACTORSYMBOL/SEPARATESYMBOL
533  if (is_fac) order.insert(order.begin(), FACTORSYMBOL);
534  if (is_sep) order.insert(order.begin(), SEPARATESYMBOL);
535 
536  return order;
537 }
538 
539 /*
540  #] occurrence_order :
541  #[ Horner_tree :
542 */
543 
579 WORD getpower (const WORD *term, int var, int pos) {
580  if (*term == ABS(*(term+*term-1))+1) return 0; // constant term
581  if (2*pos+2 >= term[2]) return 0; // too few symbols
582  if (term[2*pos+3] != var) return 0; // incorrect symbol
583  return term[2*pos+4]; // return power
584 }
585 
593 void fixarg (UWORD *t, WORD &n) {
594  int an=ABS(n), sn=SGN(n);
595  while (*(t+an-1)==0) an--;
596  n=an*sn;
597 }
598 
599 void GcdLong_fix_args (PHEAD UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc) {
600  fixarg(a,na);
601  fixarg(b,nb);
602  GcdLong(BHEAD a,na,b,nb,c,nc);
603 }
604 
605 void DivLong_fix_args(UWORD *a, WORD na, UWORD *b, WORD nb, UWORD *c, WORD *nc, UWORD *d, WORD *nd) {
606  fixarg(a,na);
607  fixarg(b,nb);
608  DivLong(a,na,b,nb,c,nc,d,nd);
609 }
610 
631 void build_Horner_tree (const WORD **terms, int numterms, int var, int maxvar, int pos, vector<WORD> *res) {
632 
633  GETIDENTITY;
634 
635  if (var == maxvar) {
636  // Horner tree is finished, so add remaining terms unfactorized
637  // (note: since only complete Horner schemes seem to be useful, numterms=1 here
638 
639  for (int fr=0; fr<numterms; fr++) {
640 
641  bool empty = true;
642  const WORD *t = terms[fr];
643 
644  // add symbols
645  if (*t != ABS(*(t+*t-1))+1)
646  for (int i=3+2*pos; i<t[2]; i+=2) {
647  res->push_back(SYMBOL);
648  res->push_back(4);
649  res->push_back(t[i]);
650  res->push_back(t[i+1]);
651  if (!empty) res->push_back(OPER_MUL);
652  empty = false;
653  }
654 
655  // if empty, add a 1, since the result should look like "... coeff *"
656  if (empty) {
657  res->push_back(SNUMBER);
658  res->push_back(5);
659  res->push_back(1);
660  res->push_back(1);
661  res->push_back(3);
662  }
663 
664  // add coefficient
665  res->push_back(SNUMBER);
666  WORD n = ABS(*(t+*t-1));
667  res->push_back(n+2);
668  for (int i=0; i<n; i++)
669  res->push_back(*(t+*t-n+i));
670  res->push_back(OPER_MUL);
671 
672  if (fr>0) res->push_back(OPER_ADD);
673  }
674 
675  // result should end with gcd of the terms; right now this never
676  // triggers, but if one would allow for incomplete Horner schemes,
677  // one should extract the gcd here
678  if (numterms > 1) {
679  res->push_back(SNUMBER);
680  res->push_back(5);
681  res->push_back(1);
682  res->push_back(1);
683  res->push_back(3);
684  res->push_back(OPER_MUL);
685  }
686  }
687  else {
688  // extract variable "var" and the gcd and proceed recursively
689 
690  WORD nnum = 0, nden = 0, ntmp = 0, ndum = 0;
691  UWORD *num = NumberMalloc("build_Horner_tree");
692  UWORD *den = NumberMalloc("build_Horner_tree");
693  UWORD *tmp = NumberMalloc("build_Horner_tree");
694  UWORD *dum = NumberMalloc("build_Horner_tree");
695 
696  // previous coefficient for gcd extraction or coefficient multiplication
697  int prev_coeff_idx = -1;
698 
699  for (int fr=0; fr<numterms;) {
700 
701  // find power of current term
702  WORD pow = getpower(terms[fr], var, pos);
703 
704  // find all terms with that power
705  int to=fr+1;
706  while (to<numterms && getpower(terms[to], var, pos) == pow) to++;
707 
708  // recursively build Horner tree of all terms proportional to var^pow
709  build_Horner_tree (terms+fr, to-fr, var+1, maxvar, pow==0?pos:pos+1, res);
710 
711  if (AN.poly_vars[var] != FACTORSYMBOL && AN.poly_vars[var] != SEPARATESYMBOL) {
712  // if normal symbol, find gcd(numerators) and gcd(denominators)
713  WORD n1 = res->at(res->size()-2) / 2;
714  WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
715 
716  WORD *t2 = fr==0 ? t1 : &res->at(prev_coeff_idx);
717  WORD n2 = fr==0 ? n1 : *(t2+*(t2+1)-1) / 2;
718  if (fr>0) t2+=2;
719 
720  GcdLong_fix_args(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,num,&nnum);
721  GcdLong_fix_args(BHEAD (UWORD *)t1+ABS(n1),ABS(n1),(UWORD *)t2+ABS(n2),ABS(n2),den,&nden);
722 
723  // divide out gcds; note: leading zeroes can be added here
724  for (int i=0; i<2; i++) {
725  if (i==1 && fr==0) break;
726 
727  WORD *t = i==0 ? t1 : t2;
728  WORD n = i==0 ? n1 : n2;
729 
730  DivLong_fix_args((UWORD *)t, n, num, nnum, tmp, &ntmp, dum, &ndum);
731  for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
732  for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
733 
734  if (SGN(ntmp) != SGN(n)) n=-n;
735 
736  DivLong_fix_args((UWORD *)t, n, den, nden, tmp, &ntmp, dum, &ndum);
737  for (int j=0; j<ABS(ntmp); j++) *t++ = tmp[j];
738  for (int j=0; j<ABS(n)-ABS(ntmp); j++) *t++ = 0;
739 
740  *t++ = SGN(n) * (2*ABS(n)+1);
741  }
742 
743  // add the addition operator
744  if (fr>0) res->push_back(OPER_ADD);
745 
746  // add the power of "var"
747  WORD nextpow = (to==numterms ? 0 : getpower(terms[to], var, pos));
748 
749  if (pow-nextpow > 0) {
750  res->push_back(SYMBOL);
751  res->push_back(4);
752  res->push_back(var);
753  res->push_back(pow-nextpow);
754  res->push_back(OPER_MUL);
755  }
756 
757  // add the extracted gcd
758  res->push_back(SNUMBER);
759  WORD n = MaX(ABS(nnum),nden);
760  res->push_back(n*2+3);
761  for (int i=0; i<ABS(nnum); i++) res->push_back(num[i]);
762  for (int i=0; i<n-ABS(nnum); i++) res->push_back(0);
763  for (int i=0; i<nden; i++) res->push_back(den[i]);
764  for (int i=0; i<n-ABS(nden); i++) res->push_back(0);
765  res->push_back(SGN(nnum)*(2*n+1));
766  res->push_back(OPER_MUL);
767 
768  prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
769  }
770  else if (AN.poly_vars[var]==FACTORSYMBOL) {
771 
772  // if factorsymbol, multiply overall integer contents
773 
774  if (fr>0) {
775  WORD n1 = res->at(res->size()-2) / 2;
776  WORD *t1 = &res->at(res->size()-2-2*ABS(n1));
777  WORD *t2 = &res->at(prev_coeff_idx);
778  WORD n2 = *(t2+*(t2+1)-1) / 2;
779  t2+=2;
780 
781  MulRat(BHEAD (UWORD *)t1,n1,(UWORD *)t2,n2,tmp,&ntmp);
782 
783  // replace previous coefficient with 1
784  n2=ABS(n2);
785  for (int i=0; i<ABS(n2); i++)
786  t2[i] = t2[n2+i] = i==0 ? 1 : 0;
787  t2[2*n2] = 2*n2+1;
788 
789  // remove this coefficient
790  for (int i=0; i<2*ABS(n1)+2; i++)
791  res->pop_back();
792 
793  // add product
794  res->back() = 2*ABS(ntmp)+3; // adjust size of term
795  res->insert(res->end(), tmp, tmp+2*ABS(ntmp)); // num/den coefficient
796  res->push_back(SGN(ntmp)*(2*ABS(ntmp)+1)); // size of coefficient
797  res->push_back(OPER_MUL); // operator
798  }
799 
800  prev_coeff_idx = res->size() - ABS(res->at(res->size()-2)) - 3;
801 
802  // multiply previous factors with this factor
803  if (fr>0)
804  res->push_back(OPER_MUL);
805  }
806  else { // AN.poly_vars[var]==SEPARATESYMBOL
807  if (fr>0)
808  res->push_back(OPER_COMMA);
809  prev_coeff_idx = -1;
810  }
811 
812  fr=to;
813  }
814 
815  // cleanup
816  NumberFree(dum,"build_Horner_tree");
817  NumberFree(tmp,"build_Horner_tree");
818  NumberFree(den,"build_Horner_tree");
819  NumberFree(num,"build_Horner_tree");
820  }
821 }
822 
831 bool term_compare (const WORD *a, const WORD *b) {
832  if (*a == ABS(*(a+*a-1))+1) return true; // coefficient comes first
833  if (*b == ABS(*(b+*b-1))+1) return false;
834  if (a[1]!=SYMBOL) return true;
835  if (b[1]!=SYMBOL) return false;
836  for (int i=3; i<a[2] && i<b[2]; i+=2) {
837  if (a[i] !=b[i] ) return a[i] >b[i];
838  if (a[i+1]!=b[i+1]) return a[i+1]<b[i+1];
839  }
840  return a[2]<b[2];
841 }
842 
852 vector<WORD> Horner_tree (const WORD *expr, const vector<WORD> &order) {
853 #ifdef DEBUG
854  MesPrint ("*** [%s, w=%w] CALL: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
855 #endif
856 
857  GETIDENTITY;
858 
859  // find the renumbering scheme (new numbers are 0,1,...,#vars-1)
860  map<WORD,WORD> renum;
861  AN.poly_num_vars = order.size();
862  AN.poly_vars = (WORD *)Malloc1(AN.poly_num_vars*sizeof(WORD), "AN.poly_vars");
863  for (int i=0; i<AN.poly_num_vars; i++) {
864  AN.poly_vars[i] = order[i];
865  renum[order[i]] = i;
866  }
867 
868  // sort variables in individual terms using bubble sort
869  WORD *sorted = AT.WorkPointer;
870 
871  for (const WORD *t=expr; *t!=0; t+=*t) {
872  memcpy(sorted, t, *t*sizeof(WORD));
873 
874  if (*t != ABS(*(t+*t-1))+1) {
875  // non-constant term
876 
877  // renumber variables, adding new variables if necessary
878  for (int i=3; i<sorted[2]; i+=2) {
879  if (!renum.count(sorted[i])) {
880  renum[sorted[i]] = AN.poly_num_vars;
881 
882  WORD *new_poly_vars = (WORD *)Malloc1((AN.poly_num_vars+1)*sizeof(WORD), "AN.poly_vars");
883  memcpy(new_poly_vars, AN.poly_vars, AN.poly_num_vars*sizeof(WORD));
884  M_free(AN.poly_vars,"poly_vars");
885  AN.poly_vars = new_poly_vars;
886  AN.poly_vars[AN.poly_num_vars] = sorted[i];
887  AN.poly_num_vars++;
888  }
889  sorted[i] = renum[sorted[i]];
890  }
891  // order variables
892  for (int i=0; i<sorted[2]/2; i++)
893  for (int j=3; j+2<sorted[2]; j+=2)
894  if (sorted[j] > sorted[j+2]) {
895  swap(sorted[j] , sorted[j+2]);
896  swap(sorted[j+1], sorted[j+3]);
897  }
898  }
899 
900  sorted += *sorted;
901  }
902 
903  *sorted = 0;
904  sorted = AT.WorkPointer;
905 
906  // find pointers to all terms and sort them efficiently
907  vector<const WORD *> terms;
908  for (const WORD *t=sorted; *t!=0; t+=*t)
909  terms.push_back(t);
910  sort(terms.rbegin(),terms.rend(),term_compare);
911 
912  // construct the Horner tree
913  vector<WORD> res;
914  build_Horner_tree(&terms[0], terms.size(), 0, AN.poly_num_vars, 0, &res);
915 
916  // remove leading zeroes in coefficients
917  int j=0;
918  for (int i=0; i<(int)res.size();) {
919  if (res[i]==OPER_ADD || res[i]==OPER_MUL || res[i]==OPER_COMMA)
920  res[j++] = res[i++];
921  else if (res[i]==SYMBOL) {
922  memmove(&res[j], &res[i], res[i+1]*sizeof(WORD));
923  i+=res[j+1];
924  j+=res[j+1];
925  }
926  else if (res[i]==SNUMBER) {
927  int n = (res[i+1]-2)/2;
928  int dn = 0;
929  while (res[i+1+n-dn]==0 && res[i+1+2*n-dn]==0) dn++;
930  res[j++] = SNUMBER;
931  res[j++] = 2*(n-dn) + 3;
932  memmove(&res[j], &res[i+2], (n-dn)*sizeof(WORD));
933  j+=n-dn;
934  memmove(&res[j], &res[i+n+2], (n-dn)*sizeof(WORD));
935  j+=n-dn;
936  res[j++] = SGN(res[i+2*n+2])*(2*(n-dn)+1);
937  i+=2*n+3;
938  }
939  }
940  res.resize(j);
941 
942 #ifdef DEBUG
943  MesPrint ("*** [%s, w=%w] DONE: Horner_tree(%a)", thetime_str().c_str(), order.size(), &order[0]);
944 #endif
945 
946  return res;
947 }
948 
949 /*
950  #] Horner_tree :
951  #[ print_tree :
952 */
953 
954 // print Horner tree (for debugging)
955 void print_tree (const vector<WORD> &tree) {
956 
957  GETIDENTITY;
958 
959  for (int i=0; i<(int)tree.size();) {
960  if (tree[i]==OPER_ADD) {
961  MesPrint("+%");
962  i++;
963  }
964  else if (tree[i]==OPER_MUL) {
965  MesPrint("*%");
966  i++;
967  }
968  else if (tree[i]==OPER_COMMA) {
969  MesPrint(",%");
970  i++;
971  }
972  else if (tree[i]==SNUMBER) {
973  UBYTE buf[100];
974  int n = tree[i+tree[i+1]-1]/2;
975  PrtLong((UWORD *)&tree[i+2], n, buf);
976  int l = strlen((char *)buf);
977  buf[l]='/';
978  n=ABS(n);
979  PrtLong((UWORD *)&tree[i+2+n], n, buf+l+1);
980  MesPrint("%s%",buf);
981  i+=tree[i+1];
982  }
983  else if (tree[i]==SYMBOL) {
984  if (AN.poly_vars[tree[i+2]] < 10000)
985  MesPrint("%s^%d%", VARNAME(symbols,AN.poly_vars[tree[i+2]]), tree[i+3]);
986  else
987  MesPrint("Z%d^%d%", MAXVARIABLES-AN.poly_vars[tree[i+2]], tree[i+3]);
988  i+=tree[i+1];
989  }
990  else {
991  MesPrint("error");
992  exit(1);
993  }
994 
995  MesPrint(" %");
996  }
997 
998  MesPrint("");
999 }
1000 
1001 /*
1002  #] print_tree :
1003  #[ generate_instructions :
1004 */
1005 
1006 
1007 struct CSEHash {
1008  size_t operator()(const vector<WORD>& n) const {
1009  return n[0];
1010  }
1011 };
1012 
1013 struct CSEEq {
1014  bool operator()(const vector<WORD>& lhs, const vector<WORD>& rhs) const {
1015  if (lhs.size() != rhs.size()) return false;
1016  for (unsigned int i = 1; i < lhs.size(); i++) {
1017  if (lhs[i] != rhs[i]) return false;
1018  }
1019  return true;
1020  }
1021 };
1022 
1023 
1024 template<typename T> size_t hash_range(T* array, int size) {
1025  size_t hash = 0;
1026 
1027  for (int i = 0; i < size; i++) {
1028  hash ^= array[i] + 0x9e3779b9 + (hash << 6) + (hash >> 2);
1029  }
1030 
1031  return hash;
1032 }
1033 
1086 vector<WORD> generate_instructions (const vector<WORD> &tree, bool do_CSE) {
1087 #ifdef DEBUG
1088  MesPrint ("*** [%s, w=%w] CALL: generate_instructions(cse=%d)",
1089  thetime_str().c_str(), do_CSE?1:0);
1090 #endif
1091 
1092  typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
1093  csemap ID;
1094 
1095  // reserve lots of space, to prevent later rehashes
1096  // TODO: what if this is too large? make a parameter?
1097  if (do_CSE) {
1098  ID.rehash(mcts_expr_score * 2);
1099  }
1100 
1101  // s is a stack of operands to process when you encounter operators
1102  // in the postfix expression tree. Operands consist of three WORDs,
1103  // formatted as the LHS/RHS of the keys in ID.
1104  stack<int> s;
1105  vector<WORD> instr;
1106  WORD numinstr = 0;
1107  vector<WORD> x;
1108 
1109  // process the expression tree
1110  for (int i=0; i<(int)tree.size();) {
1111  x.clear();
1112 
1113  if (tree[i]==SNUMBER) {
1114  WORD hash = hash_range(&tree[i], tree[i + 1]);
1115  x.push_back(hash);
1116  x.push_back(SNUMBER);
1117  x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
1118  int sign = SGN(x.back());
1119  x.back() *= sign;
1120 
1121  std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1122  s.push(0);
1123  s.push(sign * suc.first->second);
1124  s.push(SNUMBER);
1125  s.push(hash);
1126  i+=tree[i+1];
1127  }
1128  else if (tree[i]==SYMBOL) {
1129  WORD hash = hash_range(&tree[i], tree[i + 1]);
1130  if (tree[i+3]>1) {
1131  x.push_back(hash);
1132  x.push_back(SYMBOL);
1133  x.push_back(tree[i+2]+1); // variable (1-indexed)
1134  x.push_back(tree[i+3]); // power
1135 
1136  csemap::iterator it = ID.find(x);
1137  if (do_CSE && it != ID.end()) {
1138  // already-seen power of a symbol
1139  s.push(1);
1140  s.push(it->second);
1141  s.push(EXTRASYMBOL);
1142  } else {
1143  //MesPrint("strange");
1144  if (numinstr == MAXPOSITIVE) {
1145  MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1146  Terminate(-1);
1147  }
1148 
1149  // new power greater than 1 of a symbol
1150  instr.push_back(numinstr); // expr.nr
1151  instr.push_back(OPER_MUL); // operator
1152  instr.push_back(9+OPTHEAD); // length total
1153  instr.push_back(8); // length operand
1154  instr.push_back(SYMBOL); // SYMBOL
1155  instr.push_back(4); // length symbol
1156  instr.push_back(tree[i+2]); // variable
1157  instr.push_back(tree[i+3]); // power
1158  instr.push_back(1); // numerator
1159  instr.push_back(1); // denominator
1160  instr.push_back(3); // length coeff
1161  instr.push_back(0); // trailing 0
1162 
1163  ID[x] = ++numinstr;
1164  s.push(1);
1165  s.push(numinstr);
1166  s.push(EXTRASYMBOL);
1167  }
1168  }
1169  else {
1170  // power of 1
1171  s.push(tree[i+3]); // power
1172  s.push(tree[i+2]+1); // variable (1-indexed)
1173  s.push(SYMBOL);
1174  }
1175 
1176  s.push(hash); // push hash
1177  i+=tree[i+1];
1178  }
1179  else { // tree[i]==OPERATOR
1180  int oper = tree[i];
1181  i++;
1182 
1183  x.push_back(0); // placeholder for hash
1184  x.push_back(oper);
1185  vector<WORD> hash;
1186  hash.push_back(oper);
1187 
1188  // get two operands from the stack
1189  for (int operand=0; operand<2; operand++) {
1190  hash.push_back(s.top()); s.pop();
1191  x.push_back(s.top()); s.pop();
1192  x.push_back(s.top()); s.pop();
1193  x.push_back(s.top()); s.pop();
1194  }
1195 
1196  x[0] = hash_range(&hash[0], 3);
1197 
1198  // get rid of multiplications by +/-1
1199  if (oper==OPER_MUL) {
1200  bool do_continue = false;
1201 
1202  for (int operand=0; operand<2; operand++) {
1203  int idx_oper1 = operand==0 ? 2 : 5;
1204  int idx_oper2 = operand==0 ? 5 : 2;
1205 
1206  // check whether operand 1 equals +/-1
1207  if (x[idx_oper1]==SNUMBER) {
1208 
1209  int idx = ABS(x[idx_oper1+1])-1;
1210  if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
1211  // push +/- other operand and continue
1212  s.push(x[idx_oper2+2]);
1213  s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1214  s.push(x[idx_oper2]);
1215  s.push(hash[1 + (operand + 1) % 2]);
1216  do_continue = true;
1217  break;
1218  }
1219  }
1220  }
1221 
1222  if (do_continue) continue;
1223  }
1224 
1225  // check whether this subexpression has been seen before
1226  // if not, generate instruction to define it
1227  csemap::iterator it = ID.find(x);
1228  if (!do_CSE || it == ID.end()) {
1229  if (numinstr == MAXPOSITIVE) {
1230  MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1231  Terminate(-1);
1232  }
1233 
1234  instr.push_back(numinstr); // expr.nr.
1235  instr.push_back(x[1]); // operator
1236  instr.push_back(3); // length
1237 
1238  ID[x] = ++numinstr;
1239 
1240  int lenidx = instr.size()-1;
1241 
1242  for (int j=0; j<2; j++)
1243  if (x[3*j+2]==SYMBOL || x[3*j+2]==EXTRASYMBOL) {
1244  instr.push_back(8); // length total
1245  instr.push_back(x[3*j+2]); // (extra)symbol
1246  instr.push_back(4); // length (extra)symbol
1247  instr.push_back(ABS(x[3*j+3])-1); // variable (0-indexed)
1248  instr.push_back(x[3*j+4]); // power
1249  instr.push_back(1); // numerator
1250  instr.push_back(1); // denominator
1251  instr.push_back(3*SGN(x[3*j+3])); // length coeff
1252  instr[lenidx] += 8;
1253  }
1254  else { // x[3*j+1]==SNUMBER
1255  int t = ABS(x[3*j+3])-1;
1256  instr.push_back(tree[t+1]-1); // length number
1257  instr.insert(instr.end(), &tree[t+2], &tree[t]+tree[t+1]); // digits
1258  instr.back() *= SGN(instr.back()) * SGN(x[3*j+3]);
1259  instr[lenidx] += tree[t+1]-1;
1260  }
1261 
1262  instr.push_back(0); // trailing 0
1263  instr[lenidx]++;
1264  }
1265 
1266  // push new expression on the stack
1267  s.push(1);
1268  s.push(ID[x]);
1269  s.push(EXTRASYMBOL);
1270  s.push(x[0]); // push hash
1271  }
1272  }
1273 
1274 #ifdef DEBUG
1275  MesPrint ("*** [%s, w=%w] DONE: generate_instructions(cse=%d) : numoper=%d",
1276  thetime_str().c_str(), do_CSE?1:0, count_operators(instr));
1277 #endif
1278 
1279  return instr;
1280 }
1281 
1282 /*
1283  #] generate_instructions :
1284  #[ count_operators_cse :
1285 */
1286 
1287 
1295 int count_operators_cse (const vector<WORD> &tree) {
1296  //MesPrint ("*** [%s] Starting CSEE", thetime_str().c_str());
1297 
1298  typedef unordered_map<vector<WORD>, int, CSEHash, CSEEq> csemap;
1299  csemap ID;
1300 
1301  // reserve lots of space, to prevent later rehashes
1302  // TODO: what if this is too large? make a parameter?
1303  ID.rehash(mcts_expr_score * 2);
1304 
1305  // s is a stack of operands to process when you encounter operators
1306  // in the postfix expression tree. Operands consist of three WORDs,
1307  // formatted as the LHS/RHS of the keys in ID.
1308  stack<int> s;
1309  int numinstr = 0, numcommas = 0;
1310  vector<WORD> x;
1311 
1312  // process the expression tree
1313  for (int i=0; i<(int)tree.size();) {
1314  x.clear();
1315 
1316  if (tree[i]==SNUMBER) {
1317  WORD hash = hash_range(&tree[i], tree[i + 1]);
1318  x.push_back(hash);
1319  x.push_back(SNUMBER);
1320  x.insert(x.end(),&tree[i],&tree[i]+tree[i+1]);
1321  int sign = SGN(x.back());
1322  x.back() *= sign;
1323 
1324  std::pair<csemap::iterator, bool> suc = ID.insert(csemap::value_type(x, i + 1));
1325  s.push(0);
1326  s.push(sign * suc.first->second);
1327  s.push(SNUMBER);
1328  s.push(hash);
1329  i+=tree[i+1];
1330  }
1331  else if (tree[i]==SYMBOL) {
1332  WORD hash = hash_range(&tree[i], tree[i + 1]);
1333  if (tree[i+3]>1) {
1334  x.push_back(hash);
1335  x.push_back(SYMBOL);
1336  x.push_back(tree[i+2]+1); // variable (1-indexed)
1337  x.push_back(tree[i+3]); // power
1338 
1339  csemap::iterator it = ID.find(x);
1340  if (it != ID.end()) {
1341  // already-seen power of a symbol
1342  s.push(1);
1343  s.push(it->second);
1344  s.push(EXTRASYMBOL);
1345  } else {
1346  if (tree[i + 3] == 2)
1347  numinstr++;
1348  else
1349  numinstr += (int)floor(log(tree[i+3])/log(2.0)) + popcount(tree[i+3]) - 1;
1350 
1351  ID[x] = numinstr;
1352  s.push(1);
1353  s.push(numinstr);
1354  s.push(EXTRASYMBOL);
1355  }
1356  }
1357  else {
1358  // power of 1
1359  s.push(tree[i+3]); // power
1360  s.push(tree[i+2]+1); // variable (1-indexed)
1361  s.push(SYMBOL);
1362  }
1363 
1364  s.push(hash); // push hash
1365 
1366  i+=tree[i+1];
1367  }
1368  else { // tree[i]==OPERATOR
1369  int oper = tree[i];
1370  i++;
1371 
1372  x.push_back(0); // placeholder for hash
1373  x.push_back(oper);
1374  vector<WORD> hash;
1375  hash.push_back(oper);
1376 
1377  // get two operands from the stack
1378  for (int operand=0; operand<2; operand++) {
1379  hash.push_back(s.top()); s.pop();
1380  x.push_back(s.top()); s.pop();
1381  x.push_back(s.top()); s.pop();
1382  x.push_back(s.top()); s.pop();
1383  }
1384 
1385 
1386  x[0] = hash_range(&hash[0], 3);
1387 
1388  // get rid of multiplications by +/-1
1389  if (oper==OPER_MUL) {
1390  bool do_continue = false;
1391 
1392  for (int operand=0; operand<2; operand++) {
1393  int idx_oper1 = operand==0 ? 2 : 5;
1394  int idx_oper2 = operand==0 ? 5 : 2;
1395 
1396  // check whether operand 1 equals +/-1
1397  if (x[idx_oper1]==SNUMBER) {
1398 
1399  int idx = ABS(x[idx_oper1+1])-1;
1400  if (tree[idx+2]==1 && tree[idx+3]==1 && ABS(tree[idx+4])==3) {
1401  // push +/- other operand and continue
1402  s.push(x[idx_oper2+2]);
1403  s.push(x[idx_oper2+1]*SGN(x[idx_oper1+1]));
1404  s.push(x[idx_oper2]);
1405  s.push(hash[1 + (operand + 1) % 2]);
1406  do_continue = true;
1407  break;
1408  }
1409  }
1410  }
1411 
1412  if (do_continue) continue;
1413  }
1414 
1415  // check whether this subexpression has been seen before
1416  // if not, generate instruction to define it
1417  csemap::iterator it = ID.find(x);
1418  if (it == ID.end()) {
1419  if (numinstr == MAXPOSITIVE) {
1420  MesPrint((char *)"ERROR: too many temporary variables needed in optimization");
1421  Terminate(-1);
1422  }
1423 
1424  if (oper == OPER_COMMA) numcommas++;
1425  ID[x] = ++numinstr;
1426  s.push(1);
1427  s.push(numinstr);
1428  s.push(EXTRASYMBOL);
1429  } else {
1430  // push new expression on the stack
1431  s.push(1);
1432  s.push(it->second);
1433  s.push(EXTRASYMBOL);
1434  }
1435 
1436  s.push(x[0]); // push hash
1437  }
1438  }
1439 
1440  //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1441  return numinstr - numcommas;
1442 }
1443 
1444 /*
1445  #] count_operators_cse :
1446  #[ count_operators_cse_topdown :
1447 */
1448 
1449 typedef struct node {
1450  const WORD* data;
1451  struct node* l;
1452  struct node* r; // TODO: add l,r to data?
1453  WORD sign; // TODO: use data for this?
1454  UWORD hash;
1455 
1456  node() : l(NULL), r(NULL), sign(1), hash(0) {};
1457  node(const WORD* data) : data(data), l(NULL), r(NULL), sign(1), hash(0) {};
1458 
1459  // a minus sign in the tree should only count as a different entry if
1460  // it is a compound expression: a = -a, but T+-V != T+V
1461  int cmp(const struct node* rhs) const {
1462  if (this == rhs) return 0;
1463 
1464  if (data[0] != rhs->data[0]) return data[0] < rhs->data[0] ? -1 : 1;
1465  int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
1466  if (data[0] == SYMBOL || data[0] == SNUMBER) {
1467  for (int i = 0; i < data[1] + mod; i++) {
1468  if (data[i] != rhs->data[i]) return data[i] < rhs->data[i] ? -1 : 1;
1469  }
1470  } else {
1471  int lv = l->cmp(rhs->l);
1472  if (lv != 0) return lv;
1473  int rv = r->cmp(rhs->r);
1474  if (rv != 0) return rv;
1475 
1476  // TODO: only for ADD operation
1477  if (l->sign != rhs->l->sign) return l->sign < rhs->l->sign ? -1 : 1;
1478  if (r->sign != rhs->r->sign) return r->sign < rhs->r->sign ? -1 : 1;
1479  }
1480 
1481  return 0;
1482  }
1483 
1484  // less than operator
1485  bool operator() (const struct node* lhs, const struct node* rhs) const
1486  {
1487  return lhs->cmp(rhs) < 0;
1488  }
1489 
1490  void calcHash() {
1491  int mod = data[0] == SNUMBER ? -1 : 0; // don't check sign, for numbers
1492  if (data[0] == SYMBOL || data[0] == SNUMBER) {
1493  hash = hash_range(data, data[1] + mod);
1494  } else {
1495  if (l->hash == 0) l->calcHash();
1496  if (r->hash == 0) r->calcHash();
1497 
1498  // signs only matter for compound expressions
1499  size_t newr[] = {(size_t)data[0], l->hash, (size_t)l->sign, r->hash, (size_t)r->sign};
1500  hash = hash_range(newr, 5);
1501  }
1502  }
1503 } NODE;
1504 
1505 struct NodeHash {
1506  size_t operator()(const NODE* n) const {
1507  return n->hash; // already computed
1508  }
1509 };
1510 
1511 struct NodeEq {
1512  bool operator()(const NODE* lhs, const NODE* rhs) const {
1513  return lhs->cmp(rhs) == 0;
1514  }
1515 };
1516 
1517 NODE* buildTree(vector<WORD> &tree) {
1518  //MesPrint ("*** [%s] Starting CSEE topdown", thetime_str().c_str());
1519 
1520  // allocate spaces for the tree, cannot be more nodes than tree size
1521  NODE* ar = (NODE*)Malloc1(tree.size() * sizeof(NODE), "CSE tree");
1522  NODE* c = 0;
1523  unsigned int curIndex = 0;
1524 
1525  stack<NODE*> st;
1526  for (int i=0; i<(int)tree.size();) {
1527  c = ar + curIndex;
1528  new (c) NODE(&tree[i]); // placement new
1529  curIndex++;
1530 
1531  if (tree[i]==SYMBOL || tree[i] == SNUMBER) {
1532  // extract the sign to a new class member
1533  if (tree[i] == SNUMBER) {
1534  c->sign = SGN(tree[i + tree[i + 1] -1]);
1535  }
1536 
1537  c->calcHash();
1538  st.push(c);
1539  i+=tree[i+1];
1540  } else {
1541  c->r = st.top(); st.pop();
1542  c->l = st.top(); st.pop();
1543 
1544  // filter *1 and *-1
1545  // TODO: also multiply if there are two numbers?
1546  if (c->data[0] == OPER_MUL) {
1547  NODE* ch[] = {c->r, c->l};
1548  for (int j = 0; j < 2; j++)
1549  if (ch[j]->data[0] == SNUMBER && ch[j]->data[1] == 5 && ch[j]->data[2]==1 && ch[j]->data[3]==1) {
1550  ch[(j+1)%2]->sign *= ch[j]->sign; // transfer sign
1551  c = ch[(j+1)%2];
1552  break;
1553  }
1554  }
1555 
1556  c->calcHash();
1557  st.push(c);
1558  i++;
1559  }
1560  }
1561 
1562  // TODO: reallocate to smaller size? Could save memory
1563  //MesPrint("Memory difference: %d vs %d", curIndex, tree.size());
1564 
1565  // we want to make the root of the tree the first element
1566  // so that we can easily free the array.
1567  // we swap the first element with the root
1568  // we need to change the pointer in the operator node that has this element as a child
1569  // TODO: check performance
1570  for (unsigned int i = 0; i < curIndex; i++) {
1571  if (ar[i].l == ar) ar[i].l = st.top();
1572  if (ar[i].r == ar) ar[i].r = st.top();
1573  }
1574 
1575  swap(ar[0], *st.top());
1576  return ar;
1577 }
1578 
1579 int count_operators_cse_topdown (vector<WORD> &tree) {
1580  typedef unordered_set<NODE*, NodeHash, NodeEq> nodeset;
1581  nodeset ID;
1582 
1583  // reserve lots of space, to prevent later rehashes
1584  // TODO: what if this is too large? make a parameter?
1585  ID.rehash(mcts_expr_score * 2);
1586 
1587  int numinstr = 0;
1588 
1589  NODE* root = buildTree(tree);
1590 
1591  stack<NODE*> stack;
1592  stack.push(root);
1593  while (!stack.empty())
1594  {
1595  NODE* c = stack.top();
1596  stack.pop();
1597 
1598  if (c->data[0] == SYMBOL) {
1599  if (c->data[3] > 1) {
1600  std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1601  if (suc.second) { // new
1602  if (c->data[3] == 2)
1603  numinstr++;
1604  else
1605  numinstr += (int)floor(log(c->data[3])/log(2.0)) + popcount(c->data[3]) - 1;
1606  }
1607  }
1608  } else {
1609  if (c->data[0] != SNUMBER) {
1610  // operator
1611  std::pair<nodeset::iterator, bool> suc = ID.insert(c);
1612  if (suc.second) {
1613  stack.push(c->r);
1614  stack.push(c->l);
1615 
1616  // ignore OPER_COMMA
1617  if (c->data[0] == OPER_MUL || c->data[0] == OPER_ADD)
1618  numinstr++;
1619  }
1620  }
1621  }
1622  }
1623 
1624  //MesPrint ("*** [%s] Stopping CSEE", thetime_str().c_str());
1625  M_free(root, "CSE tree");
1626 
1627  return numinstr;
1628 }
1629 
1630 /*
1631  #] count_operators_cse_topdown :
1632  #[ simulated_annealing :
1633 */
1634 vector<WORD> simulated_annealing() {
1635  float minT = AO.Optimize.saMinT.fval;
1636  float maxT = AO.Optimize.saMaxT.fval;
1637  float T = maxT;
1638  float coolrate = pow(minT / maxT, 1 / (float)AO.Optimize.saIter);
1639 
1640  GETIDENTITY;
1641 
1642  // create a valid state where FACTORSYMBOL/SEPARATESYMBOL remains first
1643  vector<WORD> state = occurrence_order(optimize_expr, false);
1644  int startindex = 0;
1645  if ((state.size() > 0 && state[0] == SEPARATESYMBOL) || (state.size() > 1 && state[1] == FACTORSYMBOL)) startindex++;
1646  if (state.size() > 1 && state[1] == FACTORSYMBOL) startindex++;
1647 
1648  my_random_shuffle(BHEAD state.begin() + startindex, state.end()); // start from random scheme
1649 
1650  vector<WORD> tree = Horner_tree(optimize_expr, state);
1651  int curscore = count_operators_cse_topdown(tree);
1652 
1653  // clean poly_vars, that are allocated by Horner_tree
1654  AN.poly_num_vars = 0;
1655  M_free(AN.poly_vars,"poly_vars");
1656 
1657  std::vector<WORD> best = state; // best state
1658  int bestscore = curscore;
1659 
1660  if (startindex == (int)state.size() || (int)state.size() - startindex < 2) {
1661  return best;
1662  }
1663 
1664  for (int o = 0; o < AO.Optimize.saIter; o++) {
1665  int inda = iranf(BHEAD state.size() - startindex) + startindex;
1666  int indb = iranf(BHEAD state.size() - startindex) + startindex;
1667 
1668  if (inda == indb) {
1669  continue;
1670  }
1671 
1672  swap(state[inda], state[indb]); // swap works best for Horner
1673 
1674  vector<WORD> tree = Horner_tree(optimize_expr, state);
1675  int newscore = count_operators_cse_topdown(tree);
1676 
1677  // clean poly_vars, that are allocated by Horner_tree
1678  AN.poly_num_vars = 0;
1679  M_free(AN.poly_vars,"poly_vars");
1680 
1681  if (newscore <= curscore || 2.0 * wranf(BHEAD0) / (float)(UWORD)(-1) < exp((curscore - newscore) / T)) {
1682  curscore = newscore;
1683 
1684  if (curscore < bestscore) {
1685  bestscore = curscore;
1686  best = state;
1687  }
1688  } else {
1689  swap(state[inda], state[indb]);
1690  }
1691 
1692 #ifdef DEBUG_SA
1693  MesPrint("Score at step %d: %d", o, curscore);
1694 #endif
1695  T *= coolrate;
1696  }
1697 
1698 #ifdef DEBUG_SA
1699  MesPrint("Simulated annealing score: %d", bestscore);
1700 #endif
1701 
1702  return best;
1703 }
1704 
1705 /*
1706  #] simulated_annealing :
1707  #[ printpstree :
1708 */
1709 
1710 /*
1711 // print MCTS tree with LaTeX/pstricks (for analysis)
1712 void printpstree_rec (tree_node x, string pre="") {
1713 
1714  if (x.num_visits==1) {
1715  MesPrint("%s\\TR{%d}",pre.c_str(),x.var);
1716  }
1717  else {
1718  MesPrint("%s\\pstree%s{\\TR{%d}}{",pre.c_str(),
1719  pre==" "?"[nodesep=0, levelsep=40]":"",
1720  x.var);
1721  for (int i=0; i<(int)x.childs.size(); i++)
1722  if (x.childs[i].num_visits>0)
1723  printpstree_rec(x.childs[i], pre+" ");
1724  MesPrint("%s}",pre.c_str());
1725  }
1726 }
1727 
1728 void printpstree () {
1729  // draw tree with pstricks
1730  MesPrint ("\\documentclass{article}");
1731  MesPrint ("\\usepackage{pstricks,pst-node,pst-tree,graphicx}");
1732  MesPrint ("\\begin{document}");
1733  MesPrint ("\\scalebox{0.02}{");
1734  printpstree_rec(mcts_root," ");
1735  MesPrint ("}");
1736  MesPrint ("\\end{document}");
1737 }
1738 */
1739 
1740 /*
1741  #] printpstree :
1742  #[ find_Horner_MCTS_expand_tree :
1743 */
1744 
1776 /*
1777  #[ next_MCTS_scheme :
1778 */
1779 
1780 // find a Horner scheme to be used for the next simulation
1781 inline static void next_MCTS_scheme (PHEAD vector<WORD> *porder, vector<WORD> *pscheme, vector<tree_node *> *ppath) {
1782 
1783  vector<WORD> &order = *porder;
1784  vector<WORD> &schemev = *pscheme;
1785  vector<tree_node *> &path = *ppath;
1786  int depth = 0, nchild0;
1787  float slide_down_factor = 1.0;
1788 
1789  order.clear();
1790  path.clear();
1791 
1792  // MCTS step I: select
1793  tree_node *select = &mcts_root;
1794  path.push_back(select);
1795  nchild0 = select->childs.size();
1796  while (select->childs.size() > 0) {
1797  // add virtual loss
1798  select->num_visits++;
1799  select->sum_results+=mcts_expr_score;
1800 
1801 //-------------------------------------------------------------------
1802  switch ( AO.Optimize.mctsdecaymode ) {
1803  case 1: // Based on http://arxiv.org/abs/arXiv:1312.0841
1804  slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1805  break;
1806  case 2: // This gives a bit more cleanup time at the end.
1807  if ( 2*AT.optimtimes < AO.Optimize.mctsnumexpand ) {
1808  slide_down_factor = 1.0*(AO.Optimize.mctsnumexpand-2*AT.optimtimes);
1809  slide_down_factor /= 1.0*AO.Optimize.mctsnumexpand;
1810  }
1811  else {
1812  slide_down_factor = 0.0001;
1813  }
1814  break;
1815  case 3: // depth dependent factor combined with case 1
1816  float dd = 1.0-(1.0*depth)/(1.0*nchild0);
1817  slide_down_factor = 1.0-(1.0*AT.optimtimes)/(1.0*AO.Optimize.mctsnumexpand);
1818  if ( dd <= 0.000001 ) slide_down_factor = 1.0;
1819  else slide_down_factor /= dd;
1820  if ( slide_down_factor > 1.0 ) slide_down_factor = 1.0;
1821  break;
1822  }
1823 //-------------------------------------------------------------------
1824 
1825 #ifdef DEBUG_MCTS
1826  MesPrint("select %d",select->var);
1827 #endif
1828 
1829  // find most-promising node
1830  double best=0;
1831  tree_node *next=NULL;
1832  for (vector<tree_node>::iterator p=select->childs.begin(); p<select->childs.end(); p++) {
1833  double score;
1834  if (p->num_visits >= 1) {
1835 
1836  // there are results calculated, so select with the UCT formula
1837  score = mcts_expr_score / (p->sum_results/p->num_visits) +
1838 //-------------------------------------------------------------------------
1839  slide_down_factor *
1840 //-------------------------------------------------------------------------
1841  2 * AO.Optimize.mctsconstant.fval * sqrt(2*log(select->num_visits) / p->num_visits);
1842 
1843 #ifdef DEBUG_MCTS
1844  printf("%d: %.2lf [x=%.2lf n=%d fin=%i]\n",p->var,score,mcts_expr_score / (p->sum_results/p->num_visits),
1845  p->num_visits,p->finished?1:0);
1846  fflush(stdout);
1847 #endif
1848  }
1849  else {
1850  // no results yet, so select this node by setting score=infinite
1851  score = 1e100;
1852 
1853 #ifdef DEBUG_MCTS
1854  printf("%d: inf\n",p->var); fflush(stdout);
1855 #endif
1856  }
1857 
1858  // update best candidate
1859  if (!p->finished && score>best) {
1860  best=score;
1861  next=&*p;
1862  }
1863  }
1864 
1865  // if no node is found, this node is finished
1866  if (next==NULL) {
1867  select->finished=true;
1868  break;
1869  }
1870 
1871  // traverse down the tree
1872  select = next;
1873  path.push_back(select);
1874  order.push_back(select->var);
1875  depth++;
1876  }
1877 
1878  // MCTS step II: expand
1879 
1880 #ifdef DEBUG_MCTS
1881  MesPrint("expand %d",select->var);
1882 #endif
1883 
1884  // variables used so far
1885  set<WORD> var_used;
1886 
1887  for (int i=0; i<(int)order.size(); i++)
1888  var_used.insert(ABS(order[i])-1);
1889 
1890  // if this a new node, create node and add children
1891  if (!select->finished && select->childs.size()==0) {
1892  tree_node new_node(select->var);
1893  int sign = (order.size() == 0) ? 1 : SGN(order.back());
1894  for (int i=0; i<(int)mcts_vars.size(); i++)
1895  if (!var_used.count(mcts_vars[i])) {
1896  new_node.childs.push_back(tree_node(sign*(mcts_vars[i]+1)));
1897  if (AO.Optimize.hornerdirection==O_FORWARDANDBACKWARD)
1898  new_node.childs.push_back(tree_node(-sign*(mcts_vars[i]+1)));
1899  }
1900  my_random_shuffle(BHEAD new_node.childs.begin(), new_node.childs.end());
1901 
1902  // here locking is necessary, since operator=(tree_node) is a
1903  // non-atomic operation (using pointers makes this lock obsolete)
1904  LOCK(optimize_lock);
1905  *select = new_node;
1906  UNLOCK(optimize_lock);
1907  }
1908  // set finished if necessary
1909  if (select->childs.size()==0)
1910  select->finished = true;
1911 
1912  // add virtual loss of number of operators in original expression
1913  select->num_visits++;
1914  select->sum_results+=mcts_expr_score;
1915 
1916  // MCTS step III: simulation
1917 
1918  // create complete Horner scheme
1919  deque<WORD> scheme;
1920 
1921  for (int i=0; i<(int)mcts_vars.size(); i++)
1922  if (!var_used.count(mcts_vars[i]))
1923  scheme.push_back(mcts_vars[i]);
1924  my_random_shuffle(BHEAD scheme.begin(), scheme.end());
1925 
1926  for (int i=(int)order.size()-1; i>=0; i--) {
1927  if (order[i] > 0)
1928  scheme.push_front(order[i]-1);
1929  else
1930  scheme.push_back(-order[i]-1);
1931  }
1932 
1933  // add FACTORSYMBOL/SEPARATESYMBOL is necessary
1934  if (mcts_factorized)
1935  scheme.push_front(FACTORSYMBOL);
1936  if (mcts_separated)
1937  scheme.push_front(SEPARATESYMBOL);
1938 
1939  // Horner scheme as a vector
1940  schemev = vector<WORD>(scheme.begin(), scheme.end());
1941 
1942 }
1943 
1944 /*
1945  #] next_MCTS_scheme :
1946  #[ try_MCTS_scheme :
1947 */
1948 
1949 // count the number of operators in the given Horner scheme
1950 inline static void try_MCTS_scheme (PHEAD const vector<WORD> &scheme, int *pnum_oper) {
1951 
1952  // do Horner, CSE and count the number of operators
1953  vector<WORD> tree = Horner_tree(optimize_expr, scheme);
1954  //vector<WORD> instr = generate_instructions(tree, true);
1955  //int num_oper = count_operators(instr);
1956  //int num_oper2 = count_operators_cse(tree);
1957  //int num_oper2 = count_operators_cse_topdown(tree);
1958  //MesPrint("%d %d", num_oper, num_oper2);
1959 
1960  int num_oper = count_operators_cse_topdown(tree);
1961 
1962  // clean poly_vars, that is allocated by Horner_tree
1963  AN.poly_num_vars = 0;
1964  M_free(AN.poly_vars,"poly_vars");
1965 
1966  *pnum_oper = num_oper;
1967 
1968 }
1969 
1970 /*
1971  #] try_MCTS_scheme :
1972  #[ update_MCTS_scheme :
1973 */
1974 
1975 // update the best score and the search tree
1976 inline static void update_MCTS_scheme (int num_oper, const vector<WORD> &scheme, vector<tree_node *> *ppath) {
1977 
1978  vector<tree_node *> &path = *ppath;
1979 
1980  // update the (global) list of best Horner scheme
1981  if ((int)mcts_best_schemes.size() < AO.Optimize.mctsnumkeep ||
1982  (--mcts_best_schemes.end())->first > num_oper) {
1983  // here locking is necessary, for otherwise best_schemes may
1984  // become corrupted; lock can be prevented if each thread keeps
1985  // track of it's own list and those lists are merged in the end,
1986  // but this seems not useful to implement
1987  LOCK(optimize_lock);
1988  mcts_best_schemes.insert(make_pair(num_oper,scheme));
1989  if ((int)mcts_best_schemes.size() > AO.Optimize.mctsnumkeep)
1990  mcts_best_schemes.erase(--mcts_best_schemes.end());
1991  UNLOCK(optimize_lock);
1992  }
1993 
1994  // MCTS step IV: backpropagate
1995 
1996  // add number of operator and subtract mcts_expr_score, which
1997  // behaves as a "virtual loss"
1998  for (vector<tree_node *>::iterator p=path.begin(); p<path.end(); p++)
1999  (*p)->sum_results += num_oper - mcts_expr_score;
2000 
2001 }
2002 
2003 /*
2004  #] update_MCTS_scheme :
2005 */
2006 
2007 void find_Horner_MCTS_expand_tree () {
2008 
2009 #ifdef DEBUG
2010  MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS_expand_tree", thetime_str().c_str());
2011 #endif
2012 
2013  GETIDENTITY;
2014 
2015  // the order for the Horner scheme up to the selected node, with signs
2016  // indicating forward or backward.
2017  vector<WORD> order;
2018 
2019  // complete Horner scheme
2020  vector<WORD> scheme;
2021 
2022  // path to the selected node
2023  vector<tree_node *> path;
2024 
2025  // the number of operations obtained by the simulation
2026  int num_oper;
2027 
2028  next_MCTS_scheme(BHEAD &order, &scheme, &path);
2029  try_MCTS_scheme(BHEAD scheme, &num_oper);
2030 #ifdef DEBUG_MCTS
2031  // Actually "order" is needed only for this debug output.
2032  MesPrint ("{%a} -> {%a} -> %d", order.size(), &order[0], scheme.size(), &scheme[0], num_oper);
2033 #endif
2034  update_MCTS_scheme(num_oper, scheme, &path);
2035 
2036 #ifdef DEBUG
2037  MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS_expand_tree(%a-> %d)",
2038  thetime_str().c_str(), scheme.size(), &scheme[0], num_oper);
2039 #endif
2040 
2041 }
2042 
2043 /*
2044  #] find_Horner_MCTS_expand_tree :
2045  #[ PF_find_Horner_MCTS_expand_tree :
2046 */
2047 #ifdef WITHMPI
2048 
2049 // To remember which task is assigned to each slave. A task is represented by
2050 // a pair of a Horner scheme and the selected path for the scheme.
2051 // The index range is from 1 to PF.numtasks-1.
2052 vector<pair<vector<WORD>, vector<tree_node *> > > PF_opt_MCTS_tasks;
2053 
2054 // Initialization.
2055 void PF_find_Horner_MCTS_expand_tree_master_init () {
2056 
2057  PF_opt_MCTS_tasks.resize(PF.numtasks);
2058  for (int i = 1; i < PF.numtasks; i++) {
2059  pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[i];
2060  p.first.clear();
2061  p.second.clear();
2062  }
2063 
2064 }
2065 
2066 // Wait for an idle slave and return the process number.
2067 int PF_find_Horner_MCTS_expand_tree_master_next () {
2068 
2069  // Find an idle slave.
2070  int next;
2071  PF_Receive(PF_ANY_SOURCE, PF_OPT_MCTS_MSGTAG, &next, NULL);
2072 
2073  // Check if the slave had a task.
2074  pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2075  if (!p.first.empty()) {
2076  // If so, update the result.
2077  int num_oper;
2078  PF_Unpack(&num_oper, 1, PF_INT);
2079  update_MCTS_scheme(num_oper, p.first, &p.second);
2080 
2081  // Clear the task.
2082  p.first.clear();
2083  p.second.clear();
2084  }
2085 
2086  return next;
2087 
2088 }
2089 
2090 // The main function on the master.
2091 void PF_find_Horner_MCTS_expand_tree_master () {
2092 
2093 #ifdef DEBUG
2094  MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2095 #endif
2096 
2097  vector<WORD> order;
2098  vector<WORD> scheme;
2099  vector<tree_node *> path;
2100 
2101  next_MCTS_scheme(BHEAD &order, &scheme, &path);
2102 
2103  // Find an idle slave.
2104  int next = PF_find_Horner_MCTS_expand_tree_master_next();
2105 
2106  // Send a new task to the slave.
2108  int len = scheme.size();
2109  PF_LongSinglePack(&len, 1, PF_INT);
2110  PF_LongSinglePack(&scheme[0], len, PF_WORD);
2111  PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2112 
2113  // Remember the task.
2114  pair<vector<WORD>, vector<tree_node *> > &p = PF_opt_MCTS_tasks[next];
2115  p.first = scheme;
2116  p.second = path;
2117 
2118 #ifdef DEBUG
2119  MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master", thetime_str().c_str());
2120 #endif
2121 
2122 }
2123 
2124 // Wait for all the slaves to finish their tasks.
2125 void PF_find_Horner_MCTS_expand_tree_master_wait () {
2126 
2127 #ifdef DEBUG
2128  MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2129 #endif
2130 
2131  // Wait for all the slaves.
2132  for (int i = 1; i < PF.numtasks; i++) {
2133  int next = PF_find_Horner_MCTS_expand_tree_master_next();
2134  // Send a null task.
2136  int len = 0;
2137  PF_LongSinglePack(&len, 1, PF_INT);
2138  PF_LongSingleSend(next, PF_OPT_MCTS_MSGTAG);
2139  }
2140 
2141 #ifdef DEBUG
2142  MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_master_wait", thetime_str().c_str());
2143 #endif
2144 
2145 }
2146 
2147 // The main function on the slaves.
2148 void PF_find_Horner_MCTS_expand_tree_slave () {
2149 
2150 #ifdef DEBUG
2151  MesPrint ("*** [%s, w=%w] CALL: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2152 #endif
2153 
2154  vector<WORD> scheme;
2155 
2156  {
2157  // Send the first message to the master, which indicates I am idle.
2158  PF_PreparePack();
2159  int dummy = 0;
2160  PF_Pack(&dummy, 1, PF_INT);
2161  PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2162  }
2163 
2164  for (;;) {
2165  // Get a task from the master.
2166  PF_LongSingleReceive(MASTER, PF_OPT_MCTS_MSGTAG, NULL, NULL);
2167 
2168  // Length of the task.
2169  int len;
2170  PF_LongSingleUnpack(&len, 1, PF_INT);
2171 
2172  // No task remains.
2173  if (len == 0) break;
2174 
2175  // Perform the given task.
2176  scheme.resize(len);
2177  PF_LongSingleUnpack(&scheme[0], len, PF_WORD);
2178  int num_oper;
2179  try_MCTS_scheme(scheme, &num_oper);
2180 
2181  // Send the result to the master.
2182  PF_PreparePack();
2183  PF_Pack(&num_oper, 1, PF_INT);
2184  PF_Send(MASTER, PF_OPT_MCTS_MSGTAG);
2185  }
2186 
2187 #ifdef DEBUG
2188  MesPrint ("*** [%s, w=%w] DONE: PF_find_Horner_MCTS_expand_tree_slave", thetime_str().c_str());
2189 #endif
2190 
2191 }
2192 
2193 #endif
2194 /*
2195  #] PF_find_Horner_MCTS_expand_tree :
2196  #[ find_Horner_MCTS :
2197 */
2198 
2207 //vector<vector<WORD> > find_Horner_MCTS () {
2209 
2210 #ifdef WITHMPI
2211  if (PF.me != MASTER) {
2212  if (PF.numtasks <= 1) return;
2213  PF_find_Horner_MCTS_expand_tree_slave();
2214  return;
2215  }
2216 #endif
2217 
2218 #ifdef DEBUG
2219  MesPrint ("*** [%s, w=%w] CALL: find_Horner_MCTS", thetime_str().c_str());
2220 #endif
2221 
2222  GETIDENTITY;
2223 
2224  LONG start_time = TimeWallClock(1);
2225 
2226  // initialize the used global variables
2227  mcts_expr_score = count_operators(optimize_expr);
2228  mcts_root = tree_node();
2229 
2230  // extract all symbols from the expression
2231  set<WORD> var_set;
2232  for (WORD *t=optimize_expr; *t!=0; t+=*t)
2233  if (t[1] == SYMBOL)
2234  for (int i=3; i<t[2]; i+=2)
2235  var_set.insert(t[i]);
2236 
2237  // check for factorized/separated expression and make sure that
2238  // FACTORSYMBOL/SEPARATESYMBOL isn't included in the MCTS
2239  mcts_factorized = var_set.count(FACTORSYMBOL);
2240  if (mcts_factorized) var_set.erase(FACTORSYMBOL);
2241  mcts_separated = var_set.count(SEPARATESYMBOL);
2242  if (mcts_separated) var_set.erase(SEPARATESYMBOL);
2243 
2244  mcts_vars = vector<WORD>(var_set.begin(), var_set.end());
2245  optimize_num_vars = (int)mcts_vars.size();
2246  // initialize MCTS tree root
2247  for (int i=0; i<(int)mcts_vars.size(); i++) {
2248  if (AO.Optimize.hornerdirection != O_BACKWARD)
2249  mcts_root.childs.push_back(tree_node(+(mcts_vars[i]+1)));
2250  if (AO.Optimize.hornerdirection != O_FORWARD)
2251  mcts_root.childs.push_back(tree_node(-(mcts_vars[i]+1)));
2252  }
2253  my_random_shuffle(BHEAD mcts_root.childs.begin(), mcts_root.childs.end());
2254 
2255 #if defined(WITHMPI)
2256  PF_find_Horner_MCTS_expand_tree_master_init();
2257 #endif
2258 
2259  // initialize a potential variable mctsconstant scheme.
2260  AT.optimtimes = 0;
2261 
2262  // call expand_tree until it is called "mctsnumexpand" times, the
2263  // time limit is reached or the tree is fully finished
2264  for (int times=0; times<AO.Optimize.mctsnumexpand && !mcts_root.finished &&
2265  (AO.Optimize.mctstimelimit==0 || (TimeWallClock(1)-start_time)/100 < AO.Optimize.mctstimelimit);
2266  times++) {
2267  AT.optimtimes = times;
2268  // call expand_tree routine depending on threading mode
2269 #if defined(WITHPTHREADS)
2270  if (AM.totalnumberofthreads > 1)
2271  find_Horner_MCTS_expand_tree_threaded();
2272  else
2273 #elif defined(WITHMPI)
2274  if (PF.numtasks > 1)
2275  PF_find_Horner_MCTS_expand_tree_master();
2276  else
2277 #endif
2278  find_Horner_MCTS_expand_tree();
2279  }
2280 
2281  // if TForm or ParForm, wait for everyone to finish
2282 #ifdef WITHPTHREADS
2283  MasterWaitAll();
2284 #endif
2285 #ifdef WITHMPI
2286  PF_find_Horner_MCTS_expand_tree_master_wait();
2287 #endif
2288 #ifdef DEBUG
2289  MesPrint ("*** [%s, w=%w] DONE: find_Horner_MCTS", thetime_str().c_str());
2290 #endif
2291 
2292 }
2293 
2294 /*
2295  #] find_Horner_MCTS :
2296  #[ merge_operators :
2297 */
2298 
2356 vector<WORD> merge_operators (const vector<WORD> &all_instr, bool move_coeff) {
2357 
2358 #ifdef DEBUG_MORE
2359  MesPrint ("*** [%s, w=%w] CALL: merge_operators", thetime_str().c_str());
2360 #endif
2361 
2362  // get starting positions of instructions
2363  vector<const WORD *> instr;
2364  const WORD *tbegin = &*all_instr.begin();
2365  const WORD *tend = tbegin+all_instr.size();
2366  // copy all instructions to temp space. There will be n of them in instr.
2367  for (const WORD *t=tbegin; t!=tend; t+=*(t+2)) {
2368  instr.push_back(t);
2369  }
2370  int n = instr.size();
2371 
2372  // find parents and number of parents of instructions
2373  vector<int> par(n), numpar(n,0);
2374  for (int i=0; i<n; i++) par[i]=i;
2375 
2376  for (int i=0; i<n; i++) {
2377  for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) {
2378  if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) ) {
2379  // extra symbol t[3] is referred to in instr i.
2380  par[*(t+3)]=i;
2381  numpar[*(t+3)]++;
2382 
2383  // if coefficient isn't +/-1 or power > 1, increase numpar,
2384  // so that this is not merged
2385  if (*(t+*t-3)!=1 || *(t+*t-2)!=1 || ABS(*(t+*t-1))!=3) numpar[*(t+3)]++;
2386  if (*(t+4)>1) numpar[*(t+3)]++;
2387  }
2388  }
2389  }
2390  // determine which instructions to merge
2391  stack<int> s;
2392  s.push(n-1);
2393  vector<bool> vis(n,false);
2394 
2395  while (!s.empty()) {
2396 
2397  int i=s.top(); s.pop();
2398  if (vis[i]) continue;
2399  vis[i]=true;
2400 
2401  for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t)
2402  if ( *(t+1)==EXTRASYMBOL && *t!=1+ABS(*(t+*t-1)) )
2403  s.push(*(t+3));
2404 
2405  // condition: one parent and equal operator as parent
2406  if (numpar[i]==1 && *(instr[i]+1)==*(instr[par[i]]+1))
2407  par[i] = par[par[i]]; // The expr into which we subst par[i] to get i
2408  else
2409  par[i] = i;
2410  }
2411 
2412  // merge instructions into new instructions
2413  vector<WORD> newinstr;
2414 
2415  // stack of new expressions, all 0-indexed
2416  stack<WORD> new_expr;
2417  new_expr.push(n-1);
2418  vis = vector<bool>(n,false);
2419 
2420  // skip empty equations (might be introduced by greedy optimizations)
2421  vector<bool> skip(n,false), skipcoeff(n,false);
2422  for (int i=0; i<n; i++)
2423  if (*(instr[i]+OPTHEAD) == 0) skip[i]=skipcoeff[i]=true;
2424 
2425  // for renumbering merged parents
2426  vector<int> renum_par(n);
2427  for (int i=0; i<n; i++)
2428  renum_par[i]=i;
2429 
2430  while (!new_expr.empty()) {
2431  int x = new_expr.top(); new_expr.pop();
2432  if (vis[x]) continue;
2433  vis[x] = true;
2434 
2435  // find all instructions with parent=x and copy their arguments
2436  // into a new expression
2437  bool first_copy=true;
2438  int lenidx = newinstr.size()+2;
2439 
2440  // 1-indexed, since signs may occur
2441  stack<WORD> this_expr;
2442  this_expr.push(x+1);
2443 
2444  while (!this_expr.empty()) {
2445  // pop from stack, determine expr.nr and sign
2446  int i = this_expr.top(); this_expr.pop();
2447  int sign = SGN(i);
2448  i = ABS(i)-1;
2449  for (const WORD *t=instr[i]+OPTHEAD; *t!=0; t+=*t) { // terms in i
2450  // don't copy a term if:
2451  // (1) skip=true, since then it's merged into the parent
2452  // (2) extrasymbol with parent=x, because its children should be copied
2453  // (3) coefficient with skipcoeff=true, since it's already copied
2454  bool copy = !skip[i];
2455  if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
2456  if (par[*(t+3)] == x) {
2457  // parent of term refers to x. we push it with its sign if no skip is true
2458  // and the sign of the expr.
2459  this_expr.push(sign * (skip[i]||skipcoeff[i] ? 1 : SGN(*(t+*t-1))) * (1+*(t+3)));
2460  if (*(instr[i]+1) == OPER_MUL) sign=1;
2461  copy=false;
2462  }
2463  else {
2464  new_expr.push(*(t+3));
2465  }
2466  }
2467 
2468  if (*t == 1+ABS(*(t+*t-1)) && skipcoeff[i]) {
2469  copy=false;
2470  }
2471 
2472  if (copy) {
2473  // first term, so add header
2474  if (first_copy) {
2475  newinstr.push_back(renum_par[x]); // expr.nr.
2476  newinstr.push_back(*(instr[x]+1)); // operator
2477  newinstr.push_back(3); // length OPTHEAD?
2478  first_copy=false;
2479  }
2480 
2481  // copy term and adjust sign
2482  int thislenidx = newinstr.size();
2483  newinstr.insert(newinstr.end(), t, t+*t); // Put the whole term in newinstr
2484  newinstr.back() *= sign;
2485  if (*(instr[i]+1) == OPER_MUL) sign=1;
2486  newinstr[lenidx] += *t;
2487 
2488  // check for moving coefficients up
2489  // necessary condition: MUL-expression with 1 parent
2490  if (move_coeff && *t!=1+ABS(*(t+*t-1)) && *(instr[i]+1)!=OPER_COMMA &&
2491  *(t+1)==EXTRASYMBOL && numpar[*(t+3)]==1 && *(instr[*(t+3)]+1)==OPER_MUL) {
2492 
2493  // coefficient is always the first term (that's how Horner+generate works)
2494  const WORD *t1 = instr[*(t+3)]+OPTHEAD;
2495  const WORD *t2 = t1+*t1;
2496 
2497  if (*t1 == 1+ABS(*(t1+*t1-1))) {
2498  // t1 pointer to a coefficient, so move it
2499 
2500  // remove old coefficient of 1
2501  WORD *t3 = &*newinstr.end(); //
2502  int sign2 = SGN(t3[-1]); //
2503  newinstr.erase(newinstr.end()-3, newinstr.end());
2504  // count number of arguments; iff it is 2 move the (extra)symbol too
2505  int numargs=0;
2506  for (const WORD *tt=t1; *tt!=0; tt+=*tt) {
2507  numargs++;
2508  }
2509  if (numargs==2 && *(t2+4)==1) {
2510  // replace (extra)symbol
2511  newinstr[newinstr.size()-4] = *(t2+1);
2512  newinstr[newinstr.size()-3] = *(t2+2);
2513  newinstr[newinstr.size()-2] = *(t2+3);
2514  newinstr[newinstr.size()-1] = *(t2+4);
2515  sign2 *= SGN(*(t2+*t2-1)); // was t2[7]
2516 
2517  // ignore this expression from now on
2518  skip[*(t+3)]=true;
2519  if (*(t2+1)==EXTRASYMBOL)
2520  renum_par[*(t+3)] = *(t2+3);
2521  }
2522  else {
2523  // otherwise, ignore coefficient from now on
2524  // we need to collect the signs of the terms
2525  // first and set them to one. This was forgotten
2526  // before. Gave occasional errors.
2527  if ( numargs > 2 || ( numargs == 2 && t2[4] > 1 ) ) {
2528  for (WORD *tt=(WORD *)t2; *tt!=0; tt+=*tt) {
2529  if ( tt[*tt-1] < 0 ) {
2530  tt[*tt-1] = -tt[*tt-1];
2531  sign2 = -sign2;
2532  }
2533  }
2534  }
2535  skipcoeff[*(t+3)]=true;
2536  }
2537 
2538  // add new coefficient
2539  newinstr.insert(newinstr.end(), t1+1, t1+*t1);
2540  newinstr.back() *= sign2;
2541  newinstr[thislenidx] += ABS(newinstr.back()) - 3;
2542  newinstr[lenidx] += ABS(newinstr.back()) - 3;
2543  }
2544  }
2545  }
2546  }
2547  }
2548 
2549  // if something has been copied, add trailing zero
2550  if (!first_copy) {
2551  newinstr.push_back(0);
2552  newinstr[lenidx]++;
2553  }
2554  }
2555 
2556  // renumber the expressions to 0,1,2,..,; only keep expressions with
2557  // skip=false which are their own parent after a renumbering in case
2558  // of moved coefficients
2559 
2560  // find renumber scheme
2561  vector<int> renum(n,-1);
2562  int next=0;
2563  for (int i=0; i<n; i++)
2564  if (!skip[i] && renum_par[par[i]]==i) renum[renum_par[i]]=next++;
2565 
2566  // find new instruction index
2567  tbegin = &*newinstr.begin();
2568  tend = tbegin+newinstr.size();
2569  for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
2570  instr[renum[*t]] = t;
2571 
2572  // renumbering expressions and sort them lexicographically (in
2573  // new_instr they are in the preorder of the expression tree)
2574  vector<WORD> sortinstr;
2575  for (int i=0; i<next; i++) {
2576  int idx = sortinstr.size();
2577  sortinstr.insert(sortinstr.end(), instr[i], instr[i]+*(instr[i]+2));
2578 
2579  sortinstr[idx] = renum[sortinstr[idx]];
2580 
2581  // renumber content of an expression
2582  for (WORD *t2=&sortinstr[idx]+3; *t2!=0; t2+=*t2)
2583  if (*t2!=1+ABS(*(t2+*t2-1)) && *(t2+1)==EXTRASYMBOL)
2584  *(t2+3) = renum[*(t2+3)];
2585  }
2586 
2587 #ifdef DEBUG_MORE
2588  MesPrint ("*** [%s, w=%w] DONE: merge_operators", thetime_str().c_str());
2589 #endif
2590 
2591  return sortinstr;
2592 }
2593 
2594 /*
2595  #] merge_operators :
2596  #[ class Optimization :
2597 */
2598 
2624 public:
2625  int type, arg1, arg2, improve;
2626  vector<WORD> coeff;
2627  vector<int> eqnidxs;
2628 
2629  bool operator< (const optimization &a) const {
2630  if (arg1 != a.arg1) return arg1 < a.arg1;
2631  if (arg2 != a.arg2) return arg2 < a.arg2;
2632  if (type != a.type) return type < a.type;
2633  return coeff < a.coeff;
2634  }
2635 };
2636 
2637 /*
2638  #] class Optimization :
2639  #[ find_optimizations :
2640 */
2641 
2651 vector<optimization> find_optimizations (const vector<WORD> &instr) {
2652 
2653 #ifdef DEBUG_GREEDY
2654  MesPrint ("*** [%s, w=%w] CALL: find_optimizations", thetime_str().c_str());
2655 #endif
2656 
2657 // #[ Startup :
2658  // the resulting vector of optimizations
2659  vector<optimization> res;
2660 
2661  // a map to count the improvement of an optimization; the
2662  // improvement is stored as a vector<int> with equation numbers
2663  map<optimization, vector<int> > cnt;
2664 
2665  // a map to identify coefficients
2666  map<vector<int>,int> idx_coeff;
2667 
2668  const WORD *ebegin = &*instr.begin();
2669  const WORD *eend = ebegin+instr.size();
2670 
2671  for (int optim_type=0; optim_type<=4; optim_type++) {
2672 
2673  cnt.clear();
2674  idx_coeff.clear();
2675 
2676  optimization optim;
2677  optim.type = optim_type;
2678 // #] Startup :
2679 
2680 // #[ type 0 : find optimizations of the form z=x^n (optim.type==0)
2681  if (optim_type == 0) {
2682  for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2683  if (*(e+1) != OPER_MUL) continue;
2684  for (const WORD *t=e+3; *t!=0; t+=*t) {
2685  if (*t == ABS(*(t+*t-1))+1) continue;
2686  if (*(t+4) > 1) {
2687  optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2688  optim.arg2 = *(t+4);
2689  cnt[optim].push_back(e-ebegin);
2690  }
2691  }
2692  }
2693  }
2694 // #] type 0 :
2695 // #[ type 1 : find optimizations of the form z=x*y (optim.type==1)
2696  if (optim_type == 1) {
2697  for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2698  if (*(e+1) != OPER_MUL) continue;
2699 
2700  for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2701  if (*t1 == ABS(*(t1+*t1-1))+1) continue;
2702  int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2703 
2704  for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2705  if (*t2 == ABS(*(t2+*t2-1))+1) continue;
2706  int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2707 
2708  if (*(t1+4) == *(t2+4)) {
2709  optim.arg1 = x1;
2710  optim.arg2 = x2;
2711  if (optim.arg1 > optim.arg2)
2712  swap(optim.arg1, optim.arg2);
2713 
2714  if (*(t1+4) == 1)
2715  cnt[optim].push_back(e-ebegin);
2716  else {
2717  // E=x^n*y^n -> z=x*y; E=z^n is double improvement
2718  cnt[optim].push_back(e-ebegin);
2719  cnt[optim].push_back(e-ebegin);
2720  }
2721  }
2722  }
2723  }
2724  }
2725  }
2726 // #] type 1 :
2727 // #[ type 2 : find optimizations of the form z=c*x (optim.type==2)
2728  if (optim_type == 2) {
2729  for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2730 
2731  if (*(e+1) == OPER_ADD) {
2732  // in ADD-equation
2733 
2734  for (const WORD *t=e+3; *t!=0; t+=*t) {
2735  if (*t == ABS(*(t+*t-1))+1) continue;
2736 
2737  if (*(t+4)==1) {
2738  if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
2739 
2740  optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2741  optim.coeff.back() = ABS(optim.coeff.back());
2742 
2743  optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2744  optim.arg2 = 0;
2745 
2746  cnt[optim].push_back(e-ebegin);
2747  }
2748  }
2749  }
2750  else if (*(e+1) == OPER_MUL) {
2751  // in MUL-equation
2752  optim.coeff.clear();
2753 
2754  for (const WORD *t=e+3; *t!=0; t+=*t)
2755  if (*t == ABS(*(t+*t-1))+1) {
2756  if (ABS(*(t+*t-1))==3 && *(t+*t-2)==1 && *(t+*t-3)==1) continue;
2757  optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2758  optim.coeff.back() = ABS(optim.coeff.back());
2759  }
2760 
2761  if (!optim.coeff.empty())
2762  for (const WORD *t=e+3; *t!=0; t+=*t) {
2763  if (*t == ABS(*(t+*t-1))+1) continue;
2764  if (*(t+4) != 1) continue;
2765  optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2766  optim.arg2 = 0;
2767  cnt[optim].push_back(e-ebegin);
2768  }
2769  }
2770  }
2771  }
2772 // #] type 2 :
2773 // #[ type 3 : find optimizations of the form z=x+c (optim.type==3)
2774  if (optim_type == 3) {
2775  for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2776 
2777  if (*(e+1) != OPER_ADD) continue;
2778 
2779  optim.coeff.clear();
2780 
2781  for (const WORD *t=e+3; *t!=0; t+=*t)
2782  if (*t == ABS(*(t+*t-1))+1) {
2783  optim.coeff = vector<WORD>(t+*t-ABS(*(t+*t-1)), t+*t);
2784  }
2785 
2786  if (!optim.coeff.empty()) {
2787  for (const WORD *t=e+3; *t!=0; t+=*t) {
2788  if (*t == ABS(*(t+*t-1))+1) continue;
2789  if (ABS(*(t+*t-1))!=3 || *(t+*t-2)!=1 || *(t+*t-3)!=1) continue;
2790  if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2791 
2792  optim.arg1 = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3) + 1);
2793  optim.arg2 = 0;
2794  cnt[optim].push_back(e-ebegin);
2795  if (*(t+*t-1)==-3) optim.coeff.back() *= -1;
2796  }
2797  }
2798  }
2799  }
2800 // #] type 3 :
2801 // #[ type 4,5 : find optimizations of the form z=x+y or z=x-y (optim.type==4 or 5)
2802  if (optim_type == 4) {
2803  for (const WORD *e=ebegin; e!=eend; e+=*(e+2)) {
2804  if (*(e+1) != OPER_ADD) continue;
2805 
2806  for (const WORD *t1=e+3; *t1!=0; t1+=*t1) {
2807  if (*t1 == ABS(*(t1+*t1-1))+1) continue;
2808  int x1 = (*(t1+1)==SYMBOL ? 1 : -1) * (*(t1+3) + 1);
2809 
2810  for (const WORD *t2=t1+*t1; *t2!=0; t2+=*t2) {
2811  if (*t2 == ABS(*(t2+*t2-1))+1) continue;
2812  int x2 = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3) + 1);
2813 
2814  int sign1 = SGN(*(t1+*t1-1));
2815  int sign2 = SGN(*(t2+*t2-1));
2816 
2817  if (BigLong((UWORD *)t1+5, ABS(*(t1+*t1-1))-1, (UWORD *)t2+5, ABS(*(t2+*t2-1))-1) == 0) {
2818 
2819  optim.type = (sign1 * sign2 == 1 ? 4 : 5); // optimization type
2820  optim.arg1 = x1;
2821  optim.arg2 = x2;
2822  if (optim.arg1 > optim.arg2) {
2823  swap(optim.arg1, optim.arg2);
2824  }
2825 
2826  if (ABS(*(t1+*t1-1))==3 && *(t1+*t1-2)==1 && *(t1+*t1-3)==1)
2827  cnt[optim].push_back(e-ebegin);
2828  else {
2829  // E=2x+2y -> z=x+y; E=2z is improvement bby itself
2830  cnt[optim].push_back(e-ebegin);
2831  cnt[optim].push_back(e-ebegin);
2832  }
2833  }
2834  }
2835  }
2836  }
2837  }
2838 // #] type 4,5 :
2839 // #[ add :
2840 
2841  // add optimizations with positive improvement to the result
2842  for (map<optimization, vector<int> >::iterator i=cnt.begin(); i!=cnt.end(); i++) {
2843  int improve = i->second.size() - 1;
2844  if (improve > 0) {
2845  res.push_back(i->first);
2846  res.back().improve = improve;
2847  res.back().eqnidxs = i->second;
2848 
2849  // remove duplicates, that were add to get the correct improvement
2850  res.back().eqnidxs.erase(unique(res.back().eqnidxs.begin(), res.back().eqnidxs.end()), res.back().eqnidxs.end());
2851  }
2852  }
2853  }
2854 // #] add :
2855 
2856 #ifdef DEBUG_GREEDY
2857  MesPrint ("*** [%s, w=%w] DONE: find_optimizations",thetime_str().c_str());
2858 #endif
2859 
2860  return res;
2861 }
2862 
2863 /*
2864  #] find_optimizations :
2865  #[ do_optimization :
2866 */
2867 
2884 bool do_optimization (const optimization optim, vector<WORD> &instr, int newid) {
2885 
2886 // #[ Debug code :
2887 #ifdef DEBUG_GREEDY
2888  if (optim.type==0)
2889  MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d^%d)",
2890  thetime_str().c_str(), optim.improve,
2891  optim.arg1>0?'x':'Z', ABS(optim.arg1)-1, optim.arg2);
2892  else if (optim.type==1 || optim.type>=4)
2893  MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%c%d)",
2894  thetime_str().c_str(), optim.improve,
2895  optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2896  optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
2897  optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
2898  else {
2899  WORD n = optim.coeff.back()/2;
2900  UBYTE num[BITSINWORD*ABS(n)], den[BITSINWORD*ABS(n)];
2901  PrtLong((UWORD *)&optim.coeff[0], n, num);
2902  PrtLong((UWORD *)&optim.coeff[ABS(n)], ABS(n), den);
2903  MesPrint ("*** [%s, w=%w] CALL: do_optimization(improve=%d, %c%d%c%s/%s)",
2904  thetime_str().c_str(), optim.improve,
2905  optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
2906  optim.type==2 ? '*' : '+', num,den);
2907  }
2908 #endif
2909 // #] Debug code :
2910 
2911  bool substituted = false;
2912  WORD *ebegin = &*instr.begin();
2913 
2914 // #[ type 0 : substitution of the form z=x^n (optim.type==0)
2915  if (optim.type == 0) {
2916 
2917  int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2918  int varnumx = ABS(optim.arg1) - 1;
2919  int n = optim.arg2;
2920 
2921  for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
2922  WORD *e = ebegin + optim.eqnidxs[i];
2923  if (*(e+1) != OPER_MUL) continue;
2924 
2925  // scan through equation
2926  for (WORD *t=e+3; *t!=0; t+=*t) {
2927  if (*t == ABS(*(t+*t-1))+1) continue;
2928  if (*(t+1)==vartypex &&
2929  *(t+3)==varnumx &&
2930  *(t+4) % n == 0) {
2931 
2932  // substitute
2933  *(t+1) = EXTRASYMBOL;
2934  *(t+3) = newid;
2935  *(t+4) /= n;
2936 
2937  substituted = true;
2938  }
2939  }
2940  }
2941 
2942  if (!substituted) {
2943 #ifdef DEBUG_GREEDY
2944  MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
2945 #endif
2946  return false;
2947  }
2948 
2949  // add extra equation (Tnew = x^n)
2950  instr.push_back(newid); // eqn.nr
2951  instr.push_back(OPER_MUL); // operator
2952  instr.push_back(12); // total length
2953  instr.push_back(8); // term length
2954  instr.push_back(vartypex); // (extra)symbol
2955  instr.push_back(4); // symbol length
2956  instr.push_back(varnumx); // symbol id
2957  instr.push_back(n); // power
2958  instr.push_back(1);
2959  instr.push_back(1); // coeffient 1
2960  instr.push_back(3);
2961  instr.push_back(0); // trailing 0
2962  }
2963 // #] type 0 :
2964 // #[ type 1 : substitution of the form z=x*y (optim.type==1)
2965  if (optim.type == 1) {
2966 
2967  int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
2968  int varnumx = ABS(optim.arg1) - 1;
2969  int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
2970  int varnumy = ABS(optim.arg2) - 1;
2971 
2972  for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
2973  WORD *e = ebegin + optim.eqnidxs[i];
2974  if (*(e+1) != OPER_MUL) continue;
2975 
2976  // scan through equation
2977  int powx=0, powy=0;
2978  for (WORD *t=e+3; *t!=0; t+=*t) {
2979  if (*t == ABS(*(t+*t-1))+1) continue;
2980  if (*(t+1)==vartypex && *(t+3)==varnumx) powx = *(t+4);
2981  if (*(t+1)==vartypey && *(t+3)==varnumy) powy = *(t+4);
2982  }
2983 
2984  // substitute if found
2985  if (powx>0 && powy>0 && powx==powy) {
2986 
2987  WORD sign = 1;
2988  WORD *newt = e+3;
2989 
2990  for (WORD *t=e+3; *t!=0;) {
2991  int dt=*t;
2992 
2993  if (*t == ABS(*(t+*t-1))+1 ||
2994  (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
2995  !(*(t+1)==vartypey && *(t+3)==varnumy))) {
2996  memmove(newt, t, *t*sizeof(WORD));
2997  newt += dt;
2998  }
2999  else {
3000  sign *= SGN(*(t+*t-1));
3001  }
3002 
3003  t+=dt;
3004  }
3005 
3006  *newt++ = 8; // term length
3007  *newt++ = EXTRASYMBOL; // extrasymbol
3008  *newt++ = 4; // symbol length
3009  *newt++ = newid; // symbol id
3010  *newt++ = powx; // power
3011  *newt++ = 1;
3012  *newt++ = 1; // coefficient +/-1
3013  *newt++ = 3*sign;
3014  *newt++ = 0; // trailing 0
3015 
3016  substituted = true;
3017  }
3018  }
3019 
3020  if (!substituted) {
3021 #ifdef DEBUG_GREEDY
3022  MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3023 #endif
3024  return false;
3025  }
3026 
3027  // add extra equation (Tnew = x*y)
3028  instr.push_back(newid); // eqn.nr
3029  instr.push_back(OPER_MUL); // operator
3030  instr.push_back(20); // total length
3031  instr.push_back(8); // LHS length
3032  instr.push_back(vartypex); // (extra)symbol
3033  instr.push_back(4); // symbol length
3034  instr.push_back(varnumx); // symbol id
3035  instr.push_back(1); // power 1
3036  instr.push_back(1);
3037  instr.push_back(1); // coefficient 1
3038  instr.push_back(3);
3039  instr.push_back(8); // RHS length
3040  instr.push_back(vartypey); // (extra)symbol
3041  instr.push_back(4); // symbol length
3042  instr.push_back(varnumy); // symbol id
3043  instr.push_back(1); // power 1
3044  instr.push_back(1);
3045  instr.push_back(1); // coefficient 1
3046  instr.push_back(3);
3047  instr.push_back(0); // trailing 0
3048  }
3049 // #] type 1 :
3050 // #[ type 2 : substitution of the form z=c*x (optim.type==2)
3051 
3052  if (optim.type == 2) {
3053  int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3054  int varnum = ABS(optim.arg1) - 1;
3055 
3056  WORD ncoeff = optim.coeff.back();
3057 
3058  // scan through equations
3059  for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3060  WORD *e = ebegin + optim.eqnidxs[i];
3061 
3062  if (*(e+1) == OPER_ADD) {
3063  // scan through ADD-equation
3064  for (WORD *t=e+3; *t!=0; t+=*t) {
3065 
3066  if (*t == ABS(*(t+*t-1))+1) continue;
3067 
3068  if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1 &&
3069  BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3070  (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3071  // substitute
3072 
3073  int sign = SGN(*(t+*t-1));
3074 
3075  WORD *tend = t;
3076  while (*tend!=0) tend+=*tend;
3077  WORD nmove = tend - t - *t;
3078  memmove(t, t+*t, nmove*sizeof(WORD));
3079  t += nmove;
3080 
3081  *t++ = 8; // term length
3082  *t++ = EXTRASYMBOL; // (extra)symbol
3083  *t++ = 4; // symbol length
3084  *t++ = newid; // symbol id
3085  *t++ = 1; // power of 1
3086  *t++ = 1;
3087  *t++ = 1; // coefficient of +/-1
3088  *t++ = 3 * sign;
3089  *t++ = 0; // trailing 0
3090 
3091  substituted = true;
3092  break;
3093  }
3094  }
3095  }
3096  else if (*(e+1) == OPER_MUL) {
3097 
3098  bool coeff_match=false, var_match=false;
3099  int sign = 1;
3100 
3101  // scan through MUL-equation
3102  for (WORD *t=e+3; *t!=0; t+=*t) {
3103  if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ncoeff-1,
3104  (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0) {
3105  coeff_match = true;
3106  sign *= SGN(*(t+*t-1));
3107  }
3108  else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1) {
3109  var_match = true;
3110  sign *= SGN(*(t+*t-1));
3111  }
3112  }
3113 
3114  // substitute if found
3115  if (coeff_match && var_match) {
3116 
3117  WORD *newt = e+3;
3118 
3119  for (WORD *t=e+3; *t!=0;) {
3120 
3121  int dt=*t;
3122 
3123  if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3124  memmove(newt, t, dt*sizeof(WORD));
3125  newt += dt;
3126  }
3127 
3128  t+=dt;
3129  }
3130 
3131  *newt++ = 8; // term length
3132  *newt++ = EXTRASYMBOL; // extrasymbol
3133  *newt++ = 4; // symbol length
3134  *newt++ = newid; // symbol id
3135  *newt++ = 1; // power of 1
3136  *newt++ = 1;
3137  *newt++ = 1; // coefficient of +/-1
3138  *newt++ = 3 * sign;
3139  *newt++ = 0; // trailing 0
3140 
3141  substituted = true;
3142  }
3143  }
3144  }
3145 
3146  if (!substituted) {
3147 #ifdef DEBUG_GREEDY
3148  MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3149 #endif
3150  return false;
3151  }
3152 
3153  // add extra equation (Tnew = c*y)
3154  instr.push_back(newid); // eqn.nr
3155  instr.push_back(OPER_ADD); // operator
3156  instr.push_back(9+ABS(ncoeff)); // total length
3157  instr.push_back(5+ABS(ncoeff)); // term length
3158  instr.push_back(vartype); // (extra)symbol
3159  instr.push_back(4); // symbol length
3160  instr.push_back(varnum); // symbol id
3161  instr.push_back(1); // power of 1
3162  for (int i=0; i<ABS(ncoeff); i++) // coefficient
3163  instr.push_back(optim.coeff[i]);
3164  instr.push_back(0); // trailing 0
3165  }
3166 // #] type 2 :
3167 // #[ type 3 : substitution of the form z=x+c (optim.type==3)
3168  if (optim.type == 3) {
3169  int vartype = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3170  int varnum = ABS(optim.arg1) - 1;
3171 
3172  WORD ncoeff = optim.coeff.back();
3173 
3174  // scan through equation
3175  for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3176  WORD *e = ebegin + optim.eqnidxs[i];
3177 
3178  if (*(e+1) != OPER_ADD) continue;
3179 
3180  int coeff_match=0, var_match=0;
3181 
3182  for (WORD *t=e+3; *t!=0; t+=*t) {
3183  if (*t == ABS(*(t+*t-1))+1 && BigLong((UWORD *)&optim.coeff[0],ABS(ncoeff)-1,
3184  (UWORD *)t+*t-ABS(*(t+*t-1)),ABS(*(t+*t-1))-1) == 0)
3185  coeff_match = SGN(ncoeff) * SGN(*(t+*t-1));
3186  else if (*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)
3187  var_match = SGN(*(t+7));
3188  }
3189 
3190  // substitute if found (x+c and -x-c and matches)
3191  if (coeff_match * var_match == 1) {
3192 
3193  WORD *newt = e+3;
3194 
3195  for (WORD *t=e+3; *t!=0;) {
3196  int dt=*t;
3197  if (*t!=ABS(*(t+*t-1))+1 && !(*(t+1)==vartype && ABS(*(t+3))==varnum && *(t+4)==1)) {
3198  memmove(newt, t, dt*sizeof(WORD));
3199  newt += dt;
3200  }
3201  t+=dt;
3202  }
3203 
3204  *newt++ = 8; // term length
3205  *newt++ = EXTRASYMBOL; // extrasymbol
3206  *newt++ = 4; // symbol length
3207  *newt++ = newid; // symbol id
3208  *newt++ = 1; // power of 1
3209  *newt++ = 1;
3210  *newt++ = 1; // coefficient of +/-1
3211  *newt++ = 3*coeff_match;
3212  *newt++ = 0; // trailing zero
3213 
3214  substituted = true;
3215  }
3216  }
3217 
3218  if (!substituted) {
3219 #ifdef DEBUG_GREEDY
3220  MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3221 #endif
3222  return false;
3223  }
3224 
3225  // add extra equation (Tnew = x+c)
3226  instr.push_back(newid); // eqn.nr
3227  instr.push_back(OPER_ADD); // operator
3228  instr.push_back(13+ABS(ncoeff)); // total length
3229  instr.push_back(8); // x-term length
3230  instr.push_back(vartype); // (extra)symbol
3231  instr.push_back(4); // symbol length
3232  instr.push_back(varnum); // symbol id
3233  instr.push_back(1); // power of 1
3234  instr.push_back(1);
3235  instr.push_back(1); // coefficient of 1
3236  instr.push_back(3);
3237  instr.push_back(ABS(ncoeff)+1); // c-term length
3238  for (int i=0; i<ABS(ncoeff); i++) // coefficient
3239  instr.push_back(optim.coeff[i]);
3240  instr.push_back(0); // trailing zero
3241  }
3242 // #] type 3 :
3243 // #[ type 4,5 : substitution of the form z=x+y or z=x-y (optim.type=4 or 5)
3244  if (optim.type >= 4) {
3245 
3246  int vartypex = optim.arg1>0 ? SYMBOL : EXTRASYMBOL;
3247  int varnumx = ABS(optim.arg1) - 1;
3248  int vartypey = optim.arg2>0 ? SYMBOL : EXTRASYMBOL;
3249  int varnumy = ABS(optim.arg2) - 1;
3250 
3251  // scan through equations
3252  for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3253  WORD *e = ebegin + optim.eqnidxs[i];
3254 
3255  if (*(e+1) != OPER_ADD) continue;
3256 
3257  const WORD *coeffx=NULL, *coeffy=NULL;
3258  WORD ncoeffx=0,ncoeffy=0;
3259 
3260  // looks for terms
3261  for (WORD *t=e+3; *t!=0; t+=*t) {
3262  if (*t == ABS(*(t+*t-1))+1) continue; // constant
3263  if (*(t+1)==vartypex && *(t+3)==varnumx && *(t+4)==1) {
3264  coeffx = t+5;
3265  ncoeffx = *(t+*t-1);
3266  }
3267  if (*(t+1)==vartypey && *(t+3)==varnumy && *(t+4)==1) {
3268  coeffy = t+5;
3269  ncoeffy = *(t+*t-1);
3270  }
3271  }
3272 
3273  // check signs (type=4: x+y and -x-y, type=5: x-y and -x+y) ??????
3274  // check signs (type=4: x+y, type=5: x-y) !!!!!!!!!!
3275  if (SGN(ncoeffx) * SGN(ncoeffy) * (optim.type==4 ? 1 : -1) == 1) {
3276  // check absolute value of coeeficients
3277  if (BigLong((UWORD *)coeffx, ABS(ncoeffx)-1, (UWORD *)coeffy, ABS(ncoeffy)-1) == 0) {
3278  // substitute
3279  vector<WORD> coeff(coeffx, coeffx+ABS(ncoeffx));
3280 
3281  WORD *newt = e+3;
3282 /*
3283 if ( optim.type == 5 ) {
3284  while ( *newt ) newt+=*newt;
3285  int i = (newt - e) - 3;
3286  MesPrint(" < %a",i,e+3);
3287  newt = e+3;
3288 }
3289 */
3290  for (WORD *t=e+3; *t!=0;) {
3291  int dt=*t;
3292  if (*t == ABS(*(t+*t-1))+1 ||
3293  (!(*(t+1)==vartypex && *(t+3)==varnumx) &&
3294  !(*(t+1)==vartypey && *(t+3)==varnumy))) {
3295  memmove(newt, t, dt*sizeof(WORD));
3296  newt += dt;
3297  }
3298  t+=dt;
3299  }
3300 
3301  *newt++ = 5 + ABS(ncoeffx); // term length
3302  *newt++ = EXTRASYMBOL; // extrasymbol
3303  *newt++ = 4; // symbol length
3304  *newt++ = newid; // symbol id
3305  *newt++ = 1; // power of 1
3306  for (int j=0; j<ABS(ncoeffx); j++)// coefficient
3307  *newt++ = coeff[j];
3308  *newt++ = 0; // trailing 0
3309  substituted = true;
3310 /*
3311 if ( optim.type == 5 ) {
3312  int i = (newt - e) - 4;
3313  MesPrint(" > %a",i,e+3);
3314 }
3315 */
3316  }
3317  }
3318  }
3319 
3320  if (!substituted) {
3321 #ifdef DEBUG_GREEDY
3322  MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=false", thetime_str().c_str(), optim.improve);
3323 #endif
3324  return false;
3325  }
3326 /*
3327 if ( optim.type == 5 )
3328 MesPrint ("improve=%d, %c%d%c%c%d)", optim.improve,
3329  optim.arg1>0?'x':'Z', ABS(optim.arg1)-1,
3330  optim.type==1 ? '*' : optim.type==4 ? '+' : '-',
3331  optim.arg2>0?'x':'Z', ABS(optim.arg2)-1);
3332 */
3333  // add extra equation (Tnew = x+/-y)
3334  instr.push_back(newid); // eqn.nr
3335  instr.push_back(OPER_ADD); // operator
3336  instr.push_back(20); // total length
3337  instr.push_back(8); // term length
3338  instr.push_back(vartypex); // (extra)symbol
3339  instr.push_back(4); // symbol length
3340  instr.push_back(varnumx); // symbol id
3341  instr.push_back(1); // power of 1
3342  instr.push_back(1);
3343  instr.push_back(1); // coefficient of 1
3344  instr.push_back(3);
3345  instr.push_back(8); // term length
3346  instr.push_back(vartypey); // (extra)symbol
3347  instr.push_back(4); // symbol length
3348  instr.push_back(varnumy); // symbol id
3349  instr.push_back(1); // power of 1
3350  instr.push_back(1);
3351  instr.push_back(1); // coefficient of +/-1
3352  instr.push_back(3*(optim.type==4?1:-1));
3353  instr.push_back(0); // trailing 0
3354  }
3355 // #] type 4,5 :
3356 // #[ trivial : remove trivial equations of the form Zi = +/-Zj
3357  vector<int> renum(newid+1, 0);
3358  bool do_renum=false;
3359 
3360  // vector may be moved when it is extended
3361  ebegin = &*instr.begin();
3362 
3363  for (int i=0; i<(int)optim.eqnidxs.size(); i++) {
3364  WORD *e = ebegin + optim.eqnidxs[i];
3365  WORD *t = e+3;
3366  if (*t==0) continue; // empty (removed) equation
3367  if (*(t+*t)!=0) continue; // more than 1 term
3368  if (*t!=8) continue; // term not of correct form
3369  if (*(t+4)!=1) continue; // power != 1
3370  if (*(t+5)!=1 || *(t+6)!=1) continue; // coeff != +/-1
3371 
3372  // trivial term, so renumber this one
3373  renum[*e] = SGN(*(t+7)) * (*(t+3) + 1);
3374  do_renum = true;
3375 
3376  // remove equation
3377  *t=0;
3378  }
3379 // #] trivial :
3380 // #[ renumbering :
3381 
3382  // there are renumberings to be done, so loop through all equations
3383  if (do_renum) {
3384  WORD *eend = ebegin+instr.size();
3385 
3386  for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3387  for (WORD *t=e+3; *t!=0; t+=*t) {
3388  if (*t == ABS(*(t+*t-1))+1) continue;
3389  if (*(t+1)==EXTRASYMBOL && renum[*(t+3)]!=0) {
3390  *(t+*t-1) *= SGN(renum[*(t+3)]);
3391  *(t+3) = ABS(renum[*(t+3)]) - 1;
3392  }
3393  }
3394  }
3395  }
3396 // #] renumbering :
3397 
3398 #ifdef DEBUG_GREEDY
3399  MesPrint ("*** [%s, w=%w] DONE: do_optimization : res=true", thetime_str().c_str(), optim.improve);
3400 #endif
3401 
3402  return true;
3403 }
3404 
3405 /*
3406  #] do_optimization :
3407  #[ partial_factorize :
3408 */
3409 
3433 int partial_factorize (vector<WORD> &instr, int n, int improve) {
3434 
3435 #ifdef DEBUG_GREEDY
3436  MesPrint ("*** [%s, w=%w] CALL: partial_factorize (n=%d)", thetime_str().c_str(), n);
3437 #endif
3438 
3439  GETIDENTITY;
3440 
3441  // get starting positions of instructions
3442  vector<int> instr_idx(n);
3443  WORD *ebegin = &*instr.begin();
3444  WORD *eend = ebegin+instr.size();
3445  for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3446  instr_idx[*e] = e - ebegin;
3447  }
3448 
3449  // get reference counts
3450 /*
3451  * The next construction replaces the vector construction which is
3452  * rather costly for valgrind (and maybe also in normal running)
3453  */
3454  int nmax = 2*n;
3455  WORD *numpar = (WORD *)Malloc1(nmax*sizeof(WORD),"numpar");
3456  for ( int i = 0; i < nmax; i++ ) numpar[i] = 0;
3457 // vector<WORD> numpar(n);
3458  for (WORD *e=ebegin; e!=eend; e+=*(e+2))
3459  for (WORD *t=e+3; *t!=0; t+=*t) {
3460  if (*t == ABS(*(t+*t-1))+1) continue;
3461  if (*(t+1) == EXTRASYMBOL) numpar[*(t+3)]++;
3462  }
3463 
3464  // find factorizable expressions
3465  for (int i=0; i<n; i++) {
3466  WORD *e = &*instr.begin() + instr_idx[i];
3467  if (*(e+1) != OPER_ADD) continue;
3468 
3469  // count symbol occurrences
3470  map<WORD,WORD> cnt; // 1-indexed, <0:EXTRASYMBOL, >0:SYMBOL
3471 
3472  for (WORD *t=e+3; *t!=0; t+=*t) {
3473  if (*t==ABS(*(t+*t-1))+1) continue;
3474 
3475  // count symbols in t
3476  if (*(t+4)==1)
3477  cnt[(*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1)]++;
3478 
3479  // count symbols in extrasymbols of t
3480  if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3481  WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3482  if (*(t2+1) != OPER_MUL) continue;
3483  for (t2+=3; *t2!=0; t2+=*t2) {
3484  if (*t2 == ABS(*(t2+*t2-1))+1) continue;
3485  if (*(t2+4)==1)
3486  cnt[(*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1)]++;
3487  }
3488  }
3489  }
3490 
3491  // find most-occurring symbol
3492  WORD x=0, best=0;
3493  for (map<WORD,WORD>::iterator it=cnt.begin(); it!=cnt.end(); it++)
3494  if (it->second > best) { x=it->first; best=it->second; }
3495 
3496  // occurrence>=2 and occurrence>improve, so factorize
3497 
3498  if (best>=2 && best>improve) {
3499  // initialize new equation (Zi from example above)
3500  vector<WORD> new_eqn;
3501  new_eqn.push_back(n);
3502  new_eqn.push_back(OPER_ADD);
3503  new_eqn.push_back(0); // length
3504 
3505  WORD dt;
3506  WORD *newt=e+3;
3507  for (WORD *t=e+3; *t!=0; t+=dt) {
3508  dt = *t;
3509  bool keep=true;
3510 
3511  if (*t!=ABS(*(t+*t-1))+1) {
3512 
3513  // factorized symbol is in t itself
3514  if (*(t+4)==1) {
3515  WORD y = (*(t+1)==SYMBOL ? 1 : -1) * (*(t+3)+1);
3516  if (y==x) {
3517  new_eqn.push_back(*t-4);
3518  new_eqn.insert(new_eqn.end(), t+5, t+dt);
3519  keep=false;
3520  }
3521  }
3522 
3523  // look in extrasymbol of t with ref.count=1
3524  if (*(t+1)==EXTRASYMBOL && *(t+4)==1 && numpar[*(t+3)]==1) {
3525  WORD *t2 = &*instr.begin() + instr_idx[*(t+3)];
3526  if (*(t2+1) == OPER_MUL) {
3527  bool has_x=false;
3528  for (t2+=3; *t2!=0; t2+=*t2) {
3529  if (*t2 == ABS(*(t2+*t2-1))+1) continue;
3530  WORD y = (*(t2+1)==SYMBOL ? 1 : -1) * (*(t2+3)+1);
3531  // extrasymbol has factorized symbol
3532  if (y==x && *(t2+4)==1) {
3533  has_x=true;
3534  // copy remaining part
3535  WORD *tend=t2+*t2;
3536  WORD sign = SGN(*(tend-1));
3537  while (*tend!=0) tend+=*tend;
3538  int dt2 = tend - (t2+*t2);
3539  memmove(t2, t2+*t2, (dt2+1)*sizeof(WORD));
3540  t2 += dt2;
3541  *(t2-1) *= sign;
3542  break;
3543  }
3544  }
3545  if (has_x) {
3546  // extrasymbol has x, so add it to new equation
3547  keep=false;
3548  int thisidx=new_eqn.size();
3549  new_eqn.insert(new_eqn.end(), t, t+dt);
3550  t2 = &*instr.begin() + instr_idx[*(t+3)] + 3;
3551  // if becomes trivial, substitute the term
3552  if (*(t2+*t2)==0) {
3553  // it's a number
3554  if (*t2 == ABS(*(t2+*t2-1))+1) {
3555  if (ABS(new_eqn[new_eqn.size()-1])==3 && new_eqn[new_eqn.size()-2]==1 && new_eqn[new_eqn.size()-3]==1) {
3556  // original equation has coefficient of +/-1, so replace it
3557  WORD sign = SGN(new_eqn.back());
3558  new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3559  new_eqn.insert(new_eqn.end(), t2, t2+*t2);
3560  new_eqn.back() *= sign;
3561  *t2 = 0;
3562  }
3563  else {
3564  // two non-trivial coefficients, so multiply them
3565  // note: untested code (found no way to trigger it)
3566  UWORD *tmp = NumberMalloc("partial_factorize");
3567  WORD ntmp=0;
3568  MulRat(BHEAD (UWORD *)t2+*t2-ABS(*(t2+*t2-1)), *(t2+*t2-1),
3569  (UWORD *)&*(new_eqn.end()-ABS(new_eqn.back())), new_eqn.back(),
3570  tmp, &ntmp);
3571  new_eqn.erase(new_eqn.begin()+thisidx, new_eqn.end());
3572  new_eqn.push_back(ABS(ntmp)+1);
3573  new_eqn.insert(new_eqn.end(), tmp, tmp+ABS(ntmp));
3574  NumberFree(tmp,"partial_factorize");
3575  *t2 = 0;
3576  }
3577  }
3578  else if (*(t2+4)==1) {
3579  // it's a variable
3580  new_eqn.back() *= SGN(*(t2+*t2-1));
3581  new_eqn[thisidx+1] = *(t2+1);
3582  new_eqn[thisidx+2] = *(t2+2);
3583  new_eqn[thisidx+3] = *(t2+3);
3584  new_eqn[thisidx+4] = *(t2+4);
3585  *t2 = 0;
3586  }
3587  }
3588  }
3589  }
3590  }
3591  }
3592 
3593  // no x, so copy it
3594  if (keep) {
3595  memmove(newt, t, dt*sizeof(WORD));
3596  newt += dt;
3597  }
3598  }
3599 
3600  // finalize new equation
3601  new_eqn.push_back(0);
3602  new_eqn[2] = new_eqn.size();
3603 
3604  bool empty = newt == e+3;
3605  if ( n+1 >= nmax ) {
3606  int i, newnmax = nmax*2;
3607  WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
3608  for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3609  for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3610  M_free(numpar,"numpar");
3611  numpar = newnumpar;
3612  nmax = newnmax;
3613  }
3614 // numpar.push_back(0);
3615  n++;
3616 
3617  // if original is not empty, add new equation (Zj) to it
3618  // otherwise replace it later
3619  if (!empty) {
3620  *newt++ = 8;
3621  *newt++ = EXTRASYMBOL;
3622  *newt++ = 4;
3623  *newt++ = n;
3624  *newt++ = 1;
3625  *newt++ = 1;
3626  *newt++ = 1;
3627  *newt++ = 3;
3628  *newt++ = 0;
3629  }
3630 
3631  // add new equation to instructions
3632  instr_idx.push_back(instr.size());
3633  instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3634 
3635  // generate another new equation (Zj=x*Zi)
3636  new_eqn.clear();
3637  new_eqn.push_back(n);
3638  new_eqn.push_back(OPER_MUL);
3639  new_eqn.push_back(20);
3640  new_eqn.push_back(8);
3641 
3642  // add factorized symbol
3643  if (x>0) {
3644  new_eqn.push_back(SYMBOL);
3645  new_eqn.push_back(4);
3646  new_eqn.push_back(x-1);
3647  new_eqn.push_back(1);
3648  }
3649  else {
3650  new_eqn.push_back(EXTRASYMBOL);
3651  new_eqn.push_back(4);
3652  new_eqn.push_back(-x-1);
3653  new_eqn.push_back(1);
3654  }
3655  new_eqn.push_back(1);
3656  new_eqn.push_back(1);
3657  new_eqn.push_back(3);
3658  new_eqn.push_back(8);
3659  // add new equation (Zi)
3660  new_eqn.push_back(EXTRASYMBOL);
3661  new_eqn.push_back(4);
3662  new_eqn.push_back(n-1);
3663  new_eqn.push_back(1);
3664  new_eqn.push_back(1);
3665  new_eqn.push_back(1);
3666  new_eqn.push_back(3);
3667  new_eqn.push_back(0);
3668 
3669  if (!empty) {
3670  // add new equation (Zj) to instructions
3671  instr_idx.push_back(instr.size());
3672  instr.insert(instr.end(), new_eqn.begin(), new_eqn.end());
3673  if ( n+1 >= nmax ) {
3674  int i, newnmax = nmax*2;
3675  WORD *newnumpar = (WORD *)Malloc1(newnmax*sizeof(WORD),"newnumpar");
3676  for ( i = 0; i < n; i++ ) newnumpar[i] = numpar[i];
3677  for ( ; i < newnmax; i++ ) newnumpar[i] = 0;
3678  M_free(numpar,"numpar");
3679  numpar = newnumpar;
3680  nmax = newnmax;
3681  }
3682 // numpar.push_back(0);
3683  n++;
3684  }
3685  else {
3686  // replace e with Zj
3687  e = &*instr.begin() + instr_idx[i];
3688  e[1] = OPER_MUL;
3689  memcpy(e+3, &new_eqn[3], (new_eqn.size()-3)*sizeof(WORD));
3690  }
3691 
3692  // decrease i, so this expression is factorized again if possible
3693  i--;
3694  }
3695  }
3696 
3697 #ifdef DEBUG_GREEDY
3698  MesPrint ("*** [%s, w=%w] DONE: partial_factorize (n=%d)", thetime_str().c_str(), n);
3699 #endif
3700  M_free(numpar,"numpar");
3701  return n;
3702 }
3703 
3704 /*
3705  #] partial_factorize :
3706  #[ optimize_greedy :
3707 */
3708 
3727 vector<WORD> optimize_greedy (vector<WORD> instr, LONG time_limit) {
3728 
3729 #ifdef DEBUG
3730  int old_num_oper = count_operators(instr);
3731  MesPrint ("*** [%s, w=%w] CALL: optimize_greedy(numoper=%d)",
3732  thetime_str().c_str(), old_num_oper);
3733 #endif
3734 
3735  LONG start_time = TimeWallClock(1);
3736 
3737  WORD *ebegin = &*instr.begin();
3738  WORD *eend = ebegin+instr.size();
3739 
3740  // store final equation, since it must be the last equation later
3741  int final_eqn_idx = 0;
3742  int next_eqn = 0;
3743 
3744  for (WORD *e=ebegin; e!=eend; e+=*(e+2)) {
3745  next_eqn = *e + 1;
3746  final_eqn_idx = e-ebegin;
3747  }
3748  // optimize instructions
3749  while (TimeWallClock(1)-start_time < time_limit) {
3750  int old_next_eqn = next_eqn;
3751 
3752  // find optimizations
3753  vector<optimization> optim = find_optimizations(instr);
3754 
3755  // add_eqnidxs contains modified equations, which might have to be updated later again
3756  vector<int> add_eqnidxs;
3757 
3758  // number of optimizations to do
3759  int num_do_optims = max(AO.Optimize.greedyminnum, (int)optim.size()*AO.Optimize.greedymaxperc/100);
3760 
3761  // if best improvement is one, do all optimizations
3762  int best_improve=0;
3763  for (int i=0; i<(int)optim.size(); i++)
3764  best_improve = max(best_improve, optim[i].improve);
3765  if (best_improve <= 1)
3766  num_do_optims = MAXPOSITIVE;
3767  // do a number of optimizations
3768  while (optim.size() > 0 && num_do_optims-- > 0) {
3769 
3770  // find best optimization
3771  int best=0;
3772  best_improve=0;
3773  for (int i=0; i<(int)optim.size(); i++)
3774  if (optim[i].improve > best_improve) {
3775  best=i;
3776  best_improve=optim[i].improve;
3777  }
3778 
3779  // add extra equations
3780  for (int i=0; i<(int)add_eqnidxs.size(); i++)
3781  optim[best].eqnidxs.push_back(add_eqnidxs[i]);
3782 
3783  // do optimization, update next_eqn if successful
3784  int next_idx = instr.size();
3785  if (do_optimization(optim[best], instr, next_eqn)) {
3786  next_eqn++;
3787  add_eqnidxs.push_back(next_idx);
3788  }
3789 
3790  optim.erase(optim.begin()+best);
3791  }
3792 
3793  // partially factorize with improve >= best_improve
3794  next_eqn = partial_factorize(instr, next_eqn, best_improve);
3795 
3796  // check whether nothing has changed
3797  if (next_eqn == old_next_eqn) break;
3798  }
3799 
3800  // add final equation to the back (must be by definition)
3801  instr.push_back(next_eqn);
3802  instr.insert(instr.end(), instr.begin()+final_eqn_idx+1, instr.begin()+final_eqn_idx+instr[final_eqn_idx+2]);
3803 
3804  // removed original final equation
3805  instr[final_eqn_idx+3] = 0;
3806 
3807  // remove extra zeroes
3808  WORD *t = &instr[0];
3809 
3810  ebegin = &*instr.begin();
3811  eend = ebegin+instr.size();
3812  int de=0;
3813 
3814  for (WORD *e=ebegin; e!=eend; e+=de) {
3815  de = *(e+2);
3816  int n=3;
3817  while (*(e+n) != 0) n+=*(e+n);
3818  n++;
3819  memmove (t, e, n*sizeof(WORD));
3820  *(t+2) = n;
3821  t += n;
3822  }
3823 
3824  instr.resize(t - &instr[0]);
3825 
3826 #ifdef DEBUG
3827  MesPrint ("*** [%s, w=%w] DONE: optimize_greedy(numoper=%d) : numoper=%d",
3828  thetime_str().c_str(), old_num_oper, count_operators(instr));
3829 #endif
3830 
3831  return instr;
3832 }
3833 
3834 /*
3835  #] optimize_greedy :
3836  #[ recycle_variables :
3837 */
3838 
3867 vector<WORD> recycle_variables (const vector<WORD> &all_instr) {
3868 
3869 #ifdef DEBUG_MORE
3870  MesPrint ("*** [%s, w=%w] CALL: recycle_variables", thetime_str().c_str());
3871 #endif
3872 
3873  // get starting positions of instructions
3874  vector<const WORD *> instr;
3875  const WORD *tbegin = &*all_instr.begin();
3876  const WORD *tend = tbegin+all_instr.size();
3877  for (const WORD *t=tbegin; t!=tend; t+=*(t+2))
3878  instr.push_back(t);
3879  int n = instr.size();
3880 
3881  // determine with expressions are connected, how many intermediate
3882  // are needed (assuming it's a expression tree instead of a DAG) and
3883  // sort the leaves such that you need a minimal number of variables
3884  vector<int> vars_needed(n);
3885  vector<bool> vis(n,false);
3886  vector<vector<int> > conn(n);
3887 
3888  stack<int> s;
3889  s.push(n);
3890 
3891  while (!s.empty()) {
3892  int i=s.top(); s.pop();
3893  if (i>0) {
3894  i--;
3895  if (vis[i]) continue;
3896  vis[i]=true;
3897  s.push(-(i+1));
3898 
3899  // find all connections
3900  for (const WORD *t=instr[i]+3; *t!=0; t+=*t)
3901  if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL) {
3902  int k = *(t+3);
3903  conn[i].push_back(k);
3904  s.push(k+1);
3905  }
3906  }
3907  else {
3908  i=-i-1;
3909 
3910  // sort the childs w.r.t. needed variables
3911  vector<pair<int,int> > need;
3912  for (int j=0; j<(int)conn[i].size(); j++)
3913  need.push_back(make_pair(vars_needed[conn[i][j]], conn[i][j]));
3914 
3915  // keep the comma expression in proper order
3916  if (*(instr[i]+1) != OPER_COMMA)
3917  sort(need.rbegin(), need.rend());
3918 
3919  vars_needed[i] = 1;
3920  for (int j=0; j<(int)need.size(); j++) {
3921  vars_needed[i] = max(vars_needed[i], need[j].first+j);
3922  conn[i][j] = need[j].second;
3923  }
3924  }
3925  }
3926 
3927  // order the instructions in depth-first order and determine the first
3928  // and last occurrences of variables
3929  vector<int> order, first(n,0), last(n,0);
3930  vis = vector<bool>(n,false);
3931  s.push(n);
3932 
3933  while (!s.empty()) {
3934 
3935  int i=s.top(); s.pop();
3936 
3937  if (i>0) {
3938  i--;
3939  if (vis[i]) continue;
3940  vis[i]=true;
3941  s.push(-(i+1));
3942  for (int j=(int)conn[i].size()-1; j>=0; j--)
3943  s.push(conn[i][j]+1);
3944  }
3945  else {
3946  i=-i-1;
3947  first[i] = last[i] = order.size();
3948  order.push_back(i);
3949  for (int j=0; j<(int)conn[i].size(); j++) {
3950  int k = conn[i][j];
3951  last[k] = max(last[k], first[i]);
3952  }
3953  }
3954  }
3955 
3956  // find the renumbering to recycled variables, where at any time the
3957  // lowest-indexed variable that can be used is chosen
3958  int numvar=0;
3959  set<int> var;
3960  vector<int> renum(n);
3961 
3962  for (int i=0; i<(int)order.size(); i++) {
3963  for (int j=0; j<(int)conn[order[i]].size(); j++) {
3964  int k = conn[order[i]][j];
3965  if (last[k] == i) var.insert(renum[k]);
3966  }
3967 
3968  if (var.empty()) var.insert(numvar++);
3969  renum[order[i]] = *var.begin(); var.erase(var.begin());
3970  }
3971 
3972  // put the number of variables used in a preprocessor variable
3973 
3974  // generate new instructions with the renumbering
3975  vector<WORD> newinstr;
3976 
3977  for (int i=0; i<(int)order.size(); i++) {
3978  int x = order[i];
3979  int j = newinstr.size();
3980  newinstr.insert(newinstr.end(), instr[x], instr[x]+*(instr[x]+2));
3981 
3982  newinstr[j] = renum[newinstr[j]];
3983 
3984  for (WORD *t=&newinstr[j+3]; *t!=0; t+=*t)
3985  if (*t!=1+ABS(*(t+*t-1)) && *(t+1)==EXTRASYMBOL)
3986  *(t+3) = renum[*(t+3)];
3987  }
3988 
3989 #ifdef DEBUG_MORE
3990  MesPrint ("*** [%s, w=%w] DONE: recycle_variables", thetime_str().c_str());
3991 #endif
3992 
3993  return newinstr;
3994 }
3995 
3996 /*
3997  #] recycle_variables :
3998  #[ optimize_expression_given_Horner :
3999 */
4000 
4015 
4016 #ifdef DEBUG
4017  MesPrint ("*** [%s, w=%w] CALL: optimize_expression_given_Horner", thetime_str().c_str());
4018 #endif
4019 
4020  GETIDENTITY;
4021 
4022  // initialize timer
4023  LONG start_time = TimeWallClock(1);
4024  LONG time_limit = 100 * AO.Optimize.greedytimelimit / (AO.Optimize.horner == O_MCTS ? AO.Optimize.mctsnumkeep : 1);
4025  if (time_limit == 0) time_limit=MAXPOSITIVE;
4026 
4027  // pick a Horner scheme from the list
4028  LOCK(optimize_lock);
4029  vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4030  optimize_best_Horner_schemes.pop_back();
4031  UNLOCK(optimize_lock);
4032 
4033 // if ( ( AO.Optimize.debugflags&2 ) == 2 ) {
4034 // MesPrint ("Scheme: %a",Horner_scheme.size(),&(Horner_scheme[0]));
4035 // }
4036 
4037  // apply Horner scheme
4038  vector<WORD> tree = Horner_tree(optimize_expr, Horner_scheme);
4039 
4040  // generate instructions, eventually with CSE
4041  vector<WORD> instr;
4042 
4043  if (AO.Optimize.method == O_CSE || AO.Optimize.method == O_CSEGREEDY)
4044  instr = generate_instructions(tree, true);
4045  else
4046  instr = generate_instructions(tree, false);
4048  if (AO.Optimize.method == O_CSEGREEDY || AO.Optimize.method == O_GREEDY) {
4049  instr = merge_operators(instr, false);
4050  instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
4051  instr = merge_operators(instr, true);
4052  instr = optimize_greedy(instr, time_limit-(TimeWallClock(1)-start_time));
4053  }
4054  instr = merge_operators(instr, true);
4055 
4056  // recycle the temporary variables
4057  instr = recycle_variables(instr);
4058 
4059  // determine the quality of the code and possibly update the best code
4060  int num_oper = count_operators(instr);
4061 
4062  LOCK(optimize_lock);
4063  if (num_oper < optimize_best_num_oper) {
4064  optimize_num_vars = Horner_scheme.size();
4065  optimize_best_num_oper = num_oper;
4066  optimize_best_instr = instr;
4067  optimize_best_vars = vector<WORD>(AN.poly_vars, AN.poly_vars+AN.poly_num_vars);
4068  }
4069  UNLOCK(optimize_lock);
4070 
4071  // clean poly_vars, that are allocated by Horner_tree
4072  AN.poly_num_vars = 0;
4073  M_free(AN.poly_vars,"poly_vars");
4074 
4075 #ifdef DEBUG
4076  MesPrint ("*** [%s, w=%w] DONE: optimize_expression_given_Horner", thetime_str().c_str());
4077 #endif
4078 }
4079 
4080 /*
4081  #] optimize_expression_given_Horner :
4082  #[ PF_optimize_expression_given_Horner :
4083 */
4084 #ifdef WITHMPI
4085 
4086 // Initialization.
4087 void PF_optimize_expression_given_Horner_master_init () {
4088  // Nothing to do for now.
4089 }
4090 
4091 // Wait for an idle slave and return the process number.
4092 int PF_optimize_expression_given_Horner_master_next() {
4093 
4094  // Find an idle slave.
4095  int next;
4096  PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_HORNER_MSGTAG, &next, NULL);
4097  return next;
4098 
4099 }
4100 
4101 // The main function on the master.
4102 void PF_optimize_expression_given_Horner_master () {
4103 
4104 #ifdef DEBUG
4105  MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4106 #endif
4107 
4108  // pick a Horner scheme from the list
4109  vector<WORD> Horner_scheme = optimize_best_Horner_schemes.back();
4110  optimize_best_Horner_schemes.pop_back();
4111 
4112  // Find an idle slave.
4113  int next = PF_optimize_expression_given_Horner_master_next();
4114 
4115  // Send a new task to the slave.
4117  int len = Horner_scheme.size();
4118  PF_LongSinglePack(&len, 1, PF_INT);
4119  PF_LongSinglePack(&Horner_scheme[0], len, PF_WORD);
4120  PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4121 
4122 #ifdef DEBUG
4123  MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master", thetime_str().c_str());
4124 #endif
4125 
4126 }
4127 
4128 // Wait for all the slaves to finish their tasks.
4129 void PF_optimize_expression_given_Horner_master_wait () {
4130 
4131 #ifdef DEBUG
4132  MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4133 #endif
4134 
4135  // Wait for all the slaves.
4136  for (int i = 1; i < PF.numtasks; i++) {
4137  int next = PF_optimize_expression_given_Horner_master_next();
4138  // Send a null task.
4140  int len = 0;
4141  PF_LongSinglePack(&len, 1, PF_INT);
4142  PF_LongSingleSend(next, PF_OPT_HORNER_MSGTAG);
4143  }
4144 
4145  // Combine the result from all the slaves.
4146  optimize_best_num_oper = INT_MAX;
4147  for (int i = 1; i < PF.numtasks; i++) {
4148  PF_LongSingleReceive(PF_ANY_SOURCE, PF_OPT_COLLECT_MSGTAG, NULL, NULL);
4149 
4150  int len;
4151 
4152  // The first integer is the number of operations.
4153  PF_LongSingleUnpack(&len, 1, PF_INT);
4154 
4155  if (len >= optimize_best_num_oper) continue;
4156 
4157  // Update the best result.
4158  optimize_best_num_oper = len;
4159  PF_LongSingleUnpack(&len, 1, PF_INT);
4160  optimize_best_instr.resize(len);
4161  PF_LongSingleUnpack(&optimize_best_instr[0], len, PF_WORD);
4162  PF_LongSingleUnpack(&len, 1, PF_INT);
4163  optimize_best_vars.resize(len);
4164  PF_LongSingleUnpack(&optimize_best_vars[0], len, PF_WORD);
4165 
4166  optimize_num_vars = optimize_best_vars.size(); // TODO
4167  }
4168 
4169 #ifdef DEBUG
4170  MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_master_wait", thetime_str().c_str());
4171 #endif
4172 
4173 }
4174 
4175 // The main function on the slaves.
4176 void PF_optimize_expression_given_Horner_slave () {
4177 
4178 #ifdef DEBUG
4179  MesPrint ("*** [%s, w=%w] CALL: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4180 #endif
4181 
4182  optimize_best_Horner_schemes.clear();
4183  optimize_best_num_oper = INT_MAX;
4184 
4185  int dummy = 0;
4186  int len;
4187 
4188  for (;;) {
4189  // Ask the master for the next task.
4191  PF_LongSinglePack(&dummy, 1, PF_INT);
4192  PF_LongSingleSend(MASTER, PF_OPT_HORNER_MSGTAG);
4193 
4194  // Get a task from the master.
4195  PF_LongSingleReceive(MASTER, PF_OPT_HORNER_MSGTAG, NULL, NULL);
4196 
4197  // Length of the task.
4198  PF_LongSingleUnpack(&len, 1, PF_INT);
4199 
4200  // No task remains.
4201  if (len == 0) break;
4202 
4203  // Perform the given task.
4204  optimize_best_Horner_schemes.push_back(vector<WORD>());
4205  vector<WORD> &Horner_scheme = optimize_best_Horner_schemes.back();
4206  Horner_scheme.resize(len);
4207  PF_LongSingleUnpack(&Horner_scheme[0], len, PF_WORD);
4209  }
4210 
4211  // Send the result to the master.
4213  PF_LongSinglePack(&optimize_best_num_oper, 1, PF_INT);
4214  if (optimize_best_num_oper != INT_MAX) {
4215  len = optimize_best_instr.size();
4216  PF_LongSinglePack(&len, 1, PF_INT);
4217  PF_LongSinglePack(&optimize_best_instr[0], len, PF_WORD);
4218  len = optimize_best_vars.size();
4219  PF_LongSinglePack(&len, 1, PF_INT);
4220  PF_LongSinglePack(&optimize_best_vars[0], len, PF_WORD);
4221  }
4222  PF_LongSingleSend(MASTER, PF_OPT_COLLECT_MSGTAG);
4223 
4224 #ifdef DEBUG
4225  MesPrint ("*** [%s, w=%w] DONE: PF_optimize_expression_given_Horner_slave", thetime_str().c_str());
4226 #endif
4227 
4228 }
4229 
4230 #endif
4231 /*
4232  #] PF_optimize_expression_given_Horner :
4233  #[ generate_output :
4234 */
4235 
4245 VOID generate_output (const vector<WORD> &instr, int exprnr, int extraoffset, const vector<vector<WORD> > &brackets) {
4246 
4247 #ifdef DEBUG
4248  MesPrint ("*** [%s, w=%w] CALL: generate_output", thetime_str().c_str());
4249 #endif
4250 
4251  GETIDENTITY;
4252  vector<WORD> output;
4253 
4254  // one-indexed instead of zero-indexed
4255  extraoffset++;
4256  int num = 0;
4257  int maxsize = (int)instr.size();
4258  for (int i=0; i<maxsize; i+=instr[i+2]) num++;
4259  int *tstart = (int *)Malloc1(num*sizeof(int),"nplaces");
4260  num = 0;
4261  for (int i=0; i<maxsize; i+=instr[i+2]) tstart[num++] = i;
4262  for (int j=0; j<num; j++) {
4263  int i = tstart[j];
4264 
4265  // copy arguments
4266  WCOPY(AT.WorkPointer, &instr[i+3], (instr[i+2]-3));
4267 
4268  // update maximal variable number
4269  AO.OptimizeResult.maxvar = MaX(AO.OptimizeResult.maxvar, instr[i]+extraoffset);
4270 
4271  // renumber symbols and extrasymbols to correct values
4272  for (WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4273  if (*t == ABS(*(t+*t-1))+1) continue;
4274  if (*(t+1)==SYMBOL) *(t+3) = optimize_best_vars[*(t+3)];
4275  if (*(t+1)==EXTRASYMBOL) {
4276  *(t+1) = SYMBOL;
4277  *(t+3) = MAXVARIABLES-*(t+3)-extraoffset;
4278  }
4279  }
4280 
4281  // reformat multiplication instructions, since their current
4282  // format is "expr.nr OPER_MUL length arguments", which differs
4283  // from Form's format for a product of symbols
4284  if (instr[i+1]==OPER_MUL) {
4285 
4286  WORD *now=AT.WorkPointer+1;
4287  int dt;
4288  bool coeff=false;
4289  int coeff_sign=1;
4290 
4291  for (WORD *t=AT.WorkPointer; *t!=0; t+=dt) {
4292  dt = *t;
4293  if (*t == ABS(*(t+*t-1))+1) {
4294  // copy coefficient
4295  memmove(AT.WorkPointer+instr[i+2], t, *t*sizeof(WORD));
4296  coeff = true;
4297  }
4298  else {
4299  // move symbol
4300  int n = *(t+2);
4301  memmove(now, t+1, n*sizeof(WORD));
4302  now += n;
4303  coeff_sign *= SGN(*(t+dt-1));
4304  }
4305  }
4306 
4307  if (coeff) {
4308  // add existing coefficient
4309  int n = *(AT.WorkPointer + instr[i+2]) - 1;
4310  memmove(now, AT.WorkPointer+instr[i+2]+1, n*sizeof(WORD));
4311  now += n;
4312  }
4313  else {
4314  // add coefficient of one
4315  *now++=1;
4316  *now++=1;
4317  *now++=3;
4318  }
4319 
4320  *(now-1) *= coeff_sign;
4321 
4322  *AT.WorkPointer = now - AT.WorkPointer;
4323  *now++ = 0;
4324  }
4325 
4326  // in the case of simultaneous optimization of expressions, add the
4327  // brackets to the final expression
4328  if (instr[i+1]==OPER_COMMA) {
4329  WORD *start = AT.WorkPointer + instr[i+2];
4330  WORD *now = start;
4331  int b=0;
4332  for (const WORD *t=AT.WorkPointer; *t!=0; t+=*t) {
4333  if ( ( brackets[b].size() != 0 ) && ( brackets[b][0] == 0 ) ) break;
4334  *now++ = *t + brackets[b].size();
4335  if (brackets[b].size() != 0) {
4336  memcpy(now, &brackets[b][0], brackets[b].size()*sizeof(WORD));
4337  now += brackets[b].size();
4338  }
4339  memcpy(now, t+1, (*t-1)*sizeof(WORD));
4340  now += *t-1;
4341  b++;
4342  }
4343  *now++ = 0;
4344  memmove(AT.WorkPointer, start, (now-start)*sizeof(WORD));
4345  }
4346 
4347  // add the number of the extra symbol; if it is the last one,
4348  // replace the extra symbol number with the expression number
4349  if (i+instr[i+2]<(int)instr.size())
4350  output.push_back(instr[i]+extraoffset);
4351  else {
4352  output.push_back(-(exprnr+1));
4353  }
4354 
4355  // add code for this symbol
4356  int n=0;
4357  while (*(AT.WorkPointer+n)!=0)
4358  n += *(AT.WorkPointer+n);
4359  n++;
4360  output.insert(output.end(), AT.WorkPointer, AT.WorkPointer+n);
4361  }
4362 
4363  // trailing zero
4364  output.push_back(0);
4365 
4366  // clear buffer
4367  if (AO.OptimizeResult.code != NULL)
4368  M_free(AO.OptimizeResult.code, "optimize output");
4369 
4370  M_free(tstart,"nplaces");
4371 
4372  // copy buffer
4373  AO.OptimizeResult.codesize = output.size();
4374  AO.OptimizeResult.code = (WORD *)Malloc1(output.size()*sizeof(WORD), "optimize output");
4375  memcpy(AO.OptimizeResult.code, &output[0], output.size()*sizeof(WORD));
4376 
4377 #ifdef DEBUG
4378  MesPrint ("*** [%s, w=%w] DONE: generate_output", thetime_str().c_str());
4379 #endif
4380 }
4381 
4382 /*
4383  #] generate_output :
4384  #[ generate_expression :
4385 */
4386 
4394 WORD generate_expression (WORD exprnr) {
4395 
4396 #ifdef DEBUG
4397  MesPrint ("*** [%s, w=%w] CALL: generate_expression", thetime_str().c_str());
4398 #endif
4399 
4400  GETIDENTITY;
4401 
4402  WORD *oldWorkPointer = AT.WorkPointer;
4403 
4404  CBUF *C = cbuf+AC.cbufnum;
4405  WORD *term = AT.WorkPointer, oldcurexpr = AR.CurExpr;
4406  POSITION position;
4407  EXPRESSIONS e = Expressions+exprnr;
4408  SetScratch(AR.infile,&(e->onfile));
4409 
4410  if ( GetTerm(BHEAD term) <= 0 ) {
4411  MesPrint("Expression %d has problems in scratchfile",exprnr);
4412  Terminate(-1);
4413  }
4414  SeekScratch(AR.outfile,&position);
4415  e->onfile = position;
4416  if ( PutOut(BHEAD term,&position,AR.outfile,0) < 0 ) {
4417  MesPrint("Expression %d has problems in output scratchfile",exprnr);
4418  Terminate(-1);
4419  }
4420 
4421  AR.CurExpr = exprnr;
4422  NewSort(BHEAD0);
4423 
4424  // scan for the original expression (marked by *t<0) and give the
4425  // terms to Generator
4426  WORD *t = AO.OptimizeResult.code;
4427  {
4428  WORD old = AR.Cnumlhs; AR.Cnumlhs = 0;
4429  while (*t!=0) {
4430  bool is_expr = *t < 0;
4431  t++;
4432  while (*t!=0) {
4433  if (is_expr) {
4434  memcpy(AT.WorkPointer, t, *t*sizeof(WORD));
4435  Generator(BHEAD AT.WorkPointer, C->numlhs);
4436  }
4437  t+=*t;
4438  }
4439  t++;
4440  }
4441  AR.Cnumlhs = old;
4442  }
4443 
4444  // final sorting
4445  if (EndSort(BHEAD NULL,0) < 0) {
4446  LowerSortLevel();
4447  Terminate(-1);
4448  }
4449 
4450  AT.WorkPointer = oldWorkPointer;
4451  AR.CurExpr = oldcurexpr;
4452 
4453 #ifdef DEBUG
4454  MesPrint ("*** [%s, w=%w] DONE: generate_expression", thetime_str().c_str());
4455 #endif
4456 
4457  return 0;
4458 }
4459 
4460 /*
4461  #] generate_expression :
4462  #[ optimize_print_code :
4463 */
4464 
4474 VOID optimize_print_code (int print_expr) {
4475 
4476 #ifdef DEBUG
4477  MesPrint ("*** [%s, w=%w] CALL: optimize_print_code", thetime_str().c_str());
4478 #endif
4479  if ( ( AO.Optimize.debugflags & 1 ) != 0 ) {
4480 /*
4481  * The next code is for debugging purposes. We may want the statements
4482  * in reverse order to substitute them all back.
4483  * Jan used a Mathematica program to do this. Here we make that
4484  * Format Ox,debugflag=1;
4485  * Creates reverse order during printing.
4486  * All we have to do is put id in front of the statements. This is done
4487  * in PrintExtraSymbol.
4488  */
4489  WORD *t = AO.OptimizeResult.code;
4490  WORD num = 0; // First we count the number of objects.
4491  while (*t!=0) {
4492  num++;
4493  t++; while (*t!=0) t+=*t; t++;
4494  }
4495  WORD **tstart = (WORD **)Malloc1(num*sizeof(WORD *),"print objects");
4496  t = AO.OptimizeResult.code; num = 0; // Now we get the addresses
4497  while (*t!=0) {
4498  tstart[num++] = t;
4499  t++; while (*t!=0) t+=*t; t++;
4500  }
4501  // Flip the addresses
4502  int halfnum = num/2;
4503  for (int i=0; i<halfnum; i++) { swap(tstart[i],tstart[num-1-i]); }
4504  for ( int i = 0; i < num; i++ ) {
4505  t = tstart[i];
4506  if (*t > 0)
4507  PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4508  else if (print_expr)
4509  PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4510  }
4511  CBUF *C = cbuf + AM.sbufnum;
4512  if (C->numrhs >= AO.OptimizeResult.minvar)
4513  PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4514  }
4515  else {
4516  // print extra symbols from ConvertToPoly in optimization
4517  CBUF *C = cbuf + AM.sbufnum;
4518  if (C->numrhs >= AO.OptimizeResult.minvar)
4519  PrintSubtermList(AO.OptimizeResult.minvar, C->numrhs);
4520 
4521  WORD *t = AO.OptimizeResult.code;
4522  while (*t!=0) {
4523  if (*t > 0) {
4524  PrintExtraSymbol(*t, t+1, EXTRASYMBOL);
4525  }
4526  else if (print_expr)
4527  PrintExtraSymbol(-*t-1, t+1, EXPRESSIONNUMBER);
4528  t++;
4529  while (*t!=0) t+=*t;
4530  t++;
4531  }
4532  }
4533 
4534 #ifdef DEBUG
4535  MesPrint ("*** [%s, w=%w] DONE: optimize_print_code", thetime_str().c_str());
4536 #endif
4537 }
4538 
4539 /*
4540  #] optimize_print_code :
4541  #[ Optimize :
4542 */
4543 
4587 int Optimize (WORD exprnr, int do_print) {
4588 
4589 #if defined(WITHMPI) && (defined(DEBUG) || defined(DEBUG_MORE) || defined(DEBUG_MCTS) || defined(DEBUG_GREEDY))
4590  // set AS.printflag negative temporary.
4591  struct save_printflag {
4592  save_printflag() {
4593  oldprintflag = AS.printflag;
4594  AS.printflag = -1;
4595  }
4596  ~save_printflag() {
4597  AS.printflag = oldprintflag;
4598  }
4599  int oldprintflag;
4600  } save_printflag_;
4601 #endif
4602 
4603 #ifdef DEBUG
4604  MesPrint ("*** [%s, w=%w] CALL: Optimize", thetime_str().c_str());
4605  MesPrint ("*** %"); PrintRunningTime();
4606 #endif
4607 
4608 #ifdef WITHPTHREADS
4609  optimize_lock = dummylock;
4610 #endif
4611 
4612  AO.OptimizeResult.minvar = (cbuf + AM.sbufnum)->numrhs + 1;
4613 
4614  if (get_expression(exprnr) < 0) return -1;
4615  vector<vector<WORD> > brackets = get_brackets();
4616 
4617 #ifdef DEBUG
4618 #ifdef WITHMPI
4619  if (PF.me == MASTER)
4620 #endif
4621  MesPrint ("*** runtime after preparing the expression: %"); PrintRunningTime();
4622 #endif
4623 
4624  if (optimize_expr[0]==0 ||
4625  (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==ABS(optimize_expr[optimize_expr[0]-1])+1) ||
4626  (optimize_expr[optimize_expr[0]]==0 && optimize_expr[0]==8 &&
4627  optimize_expr[5]==1 && optimize_expr[6]==1 && ABS(optimize_expr[7])==3)) {
4628  if (AO.OptimizeResult.code != NULL)
4629  M_free(AO.OptimizeResult.code, "optimize output");
4630 
4631  // zero terms or one trivial term (number or +/-variable), so no
4632  // optimization, so copy expression; special case because without
4633  // operators the optimization crashes
4634  AO.OptimizeResult.code = (WORD *)Malloc1((optimize_expr[0]+3)*sizeof(WORD), "optimize output");
4635  AO.OptimizeResult.code[0] = -(exprnr+1);
4636  memcpy(AO.OptimizeResult.code+1, optimize_expr, (optimize_expr[0]+1)*sizeof(WORD));
4637  AO.OptimizeResult.code[optimize_expr[0]+2] = 0;
4638  }
4639  else {
4640  // find Horner scheme(s)
4641  optimize_best_Horner_schemes.clear();
4642  if (AO.Optimize.horner == O_OCCURRENCE) {
4643  if (AO.Optimize.hornerdirection != O_BACKWARD)
4644  optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, false));
4645  if (AO.Optimize.hornerdirection != O_FORWARD)
4646  optimize_best_Horner_schemes.push_back(occurrence_order(optimize_expr, true));
4647  }
4648  else {
4649  if (AO.Optimize.horner == O_SIMULATED_ANNEALING) {
4650  optimize_best_Horner_schemes.push_back(simulated_annealing());
4651  }
4652  else {
4653  mcts_best_schemes.clear();
4654  if ( AO.inscheme ) {
4655  optimize_best_Horner_schemes.push_back(vector<WORD>(AO.schemenum));
4656  for ( int i=0; i < AO.schemenum; i++ )
4657  optimize_best_Horner_schemes[0][i] = AO.inscheme[i];
4658  }
4659  else {
4660  for ( int i = 0; i < AO.Optimize.mctsnumrepeat; i++ )
4661  find_Horner_MCTS();
4662  // generate results
4663  for (set<pair<int,vector<WORD> > >::iterator i=mcts_best_schemes.begin(); i!=mcts_best_schemes.end(); i++) {
4664  optimize_best_Horner_schemes.push_back(i->second);
4665 #ifdef DEBUG_MCTS
4666  MesPrint ("{%a} -> %d",i->second.size(), &i->second[0], i->first);
4667 #endif
4668  }
4669  }
4670  // clear the tree by making a new empty one.
4671  mcts_root = tree_node();
4672  }
4673  }
4674 #ifdef DEBUG
4675 #ifdef WITHMPI
4676  if (PF.me == MASTER)
4677 #endif
4678  MesPrint ("*** runtime after Horner: %"); PrintRunningTime();
4679 #endif
4680 
4681 #ifdef WITHMPI
4682  if (PF.me == MASTER ) {
4683  PF_optimize_expression_given_Horner_master_init();
4684 #endif
4685 
4686  // find best Horner scheme and results
4687  optimize_best_num_oper = INT_MAX;
4688 
4689  int imax = (int)optimize_best_Horner_schemes.size();
4690 
4691  for (int i=0; i<imax; i++) {
4692 #if defined(WITHPTHREADS)
4693  if (AM.totalnumberofthreads > 1)
4694  optimize_expression_given_Horner_threaded();
4695  else
4696 #elif defined(WITHMPI)
4697  if (PF.numtasks > 1)
4698  PF_optimize_expression_given_Horner_master();
4699  else
4700 #endif
4702  }
4703 
4704 #ifdef WITHMPI
4705  PF_optimize_expression_given_Horner_master_wait();
4706  }
4707  else {
4708  if (PF.numtasks > 1)
4709  PF_optimize_expression_given_Horner_slave();
4710  }
4711 #endif
4712 
4713 #ifdef WITHPTHREADS
4714  MasterWaitAll();
4715 #endif
4716  // format results, then print it (for "Print") or modify
4717  // expression (for "#Optimize")
4718 #ifdef WITHMPI
4719  if (PF.me == MASTER)
4720 #endif
4721  generate_output(optimize_best_instr, exprnr, cbuf[AM.sbufnum].numrhs, brackets);
4722 #ifdef WITHMPI
4723  else {
4724  // non-null dummy code for slaves
4725  if (AO.OptimizeResult.code == NULL) {
4726  AO.OptimizeResult.code = (WORD *)Malloc1(sizeof(WORD), "optimize output");
4727  }
4728  }
4729 #endif
4730  }
4731 
4732 #ifdef WITHMPI
4733  if (PF.me == MASTER) {
4734  PF_PreparePack();
4735  PF_Pack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4736  PF_Pack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4737  }
4738  PF_Broadcast();
4739  if (PF.me != MASTER) {
4740  PF_Unpack(&AO.OptimizeResult.minvar, 1, PF_WORD);
4741  PF_Unpack(&AO.OptimizeResult.maxvar, 1, PF_WORD);
4742  }
4743 #endif
4744 
4745  // set preprocessor variables
4746  char str[100];
4747  snprintf (str,100,"%d",AO.OptimizeResult.minvar);
4748  PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)str,0,1);
4749  snprintf (str,100,"%d",AO.OptimizeResult.maxvar);
4750  PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)str,0,1);
4751 
4752  if (do_print) {
4753 #ifdef WITHMPI
4754  if (PF.me == MASTER)
4755 #endif
4757  ClearOptimize();
4758  }
4759  else {
4760 #ifdef WITHMPI
4761  if (PF.me == MASTER)
4762 #endif
4763  generate_expression(exprnr);
4764  }
4765 
4766 #ifdef WITHMPI
4767  if (PF.me == MASTER) {
4768 #endif
4769 
4770  if ( AO.Optimize.printstats > 0 ) {
4771  char str[20];
4772  MesPrint("");
4773  count_operators(optimize_expr,true);
4774  int numop = count_operators(optimize_best_instr,true);
4775  snprintf(str,20,"%d",numop);
4776  PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
4777  }
4778  else {
4779  char str[20];
4780  int numop = count_operators(optimize_best_instr,false);
4781  snprintf(str,20,"%d",numop);
4782  PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)str,0,1);
4783  }
4784 
4785  if ( ( AO.Optimize.schemeflags&1 ) == 1 ) {
4786  GETIDENTITY
4787  UBYTE *OutScr, *Out, *old1 = AO.OutputLine, *old2 = AO.OutFill;
4788  int i, sym;
4789  AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4790  FiniLine();
4791  OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4792  TokenToLine((UBYTE *)" Scheme selected: ");
4793  for ( i = 0; i < optimize_num_vars; i++ ) {
4794  Out = OutScr;
4795  sym = optimize_best_vars[i];
4796  if ( i > 0 ) TokenToLine((UBYTE *)",");
4797  if ( sym < NumSymbols ) {
4798  StrCopy(FindSymbol(sym),OutScr);
4799 /* StrCopy(VARNAME(symbols,sym),OutScr); */
4800  }
4801  else {
4802  Out = StrCopy((UBYTE *)AC.extrasym,Out);
4803  if ( AC.extrasymbols == 0 ) {
4804  Out = NumCopy((MAXVARIABLES-sym),Out);
4805  Out = StrCopy((UBYTE *)"_",Out);
4806  }
4807  else if ( AC.extrasymbols == 1 ) {
4808  Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4809  }
4810  }
4811  TokenToLine(OutScr);
4812  }
4813  TokenToLine((UBYTE *)";");
4814  FiniLine();
4815  AO.OutFill = old2;
4816  AO.OutputLine = old1;
4817  }
4818 
4819  {
4820  GETIDENTITY
4821  UBYTE *OutScr, *Out, *outstring = 0;
4822  int i, sym;
4823  AO.OutputLine = AO.OutFill = (UBYTE *)AT.WorkPointer;
4824  OutScr = (UBYTE *)AT.WorkPointer + ( TOLONG(AT.WorkTop) - TOLONG(AT.WorkPointer) ) /2;
4825  for ( i = 0; i < optimize_num_vars; i++ ) {
4826  Out = OutScr;
4827  sym = optimize_best_vars[i];
4828  if ( sym < NumSymbols ) {
4829  StrCopy(FindSymbol(sym),OutScr);
4830 /* StrCopy(VARNAME(symbols,sym),OutScr); */
4831  }
4832  else {
4833  Out = StrCopy((UBYTE *)AC.extrasym,Out);
4834  if ( AC.extrasymbols == 0 ) {
4835  Out = NumCopy((MAXVARIABLES-sym),Out);
4836  Out = StrCopy((UBYTE *)"_",Out);
4837  }
4838  else if ( AC.extrasymbols == 1 ) {
4839  Out = AddArrayIndex((MAXVARIABLES-sym),Out);
4840  }
4841  }
4842  outstring = AddToString(outstring,OutScr,1);
4843  }
4844  if ( outstring == 0 ) {
4845  PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)"",0,1);
4846  }
4847  else {
4848  PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)outstring,0,1);
4849  M_free(outstring,"AddToString");
4850  }
4851  }
4852 
4853 #ifdef WITHMPI
4854  }
4855 
4856  // synchronize optimvalue_ and optimscheme_
4857  if ( PF.me == MASTER ) {
4858  UBYTE *value;
4859  int bytes;
4860 
4862 
4863  value = GetPreVar((UBYTE *)"optimvalue_", WITHERROR);
4864  bytes = strlen((char *)value);
4865  PF_LongMultiPack(&bytes, 1, PF_INT);
4866  PF_LongMultiPack(value, bytes, PF_BYTE);
4867 
4868  value = GetPreVar((UBYTE *)"optimscheme_", WITHERROR);
4869  bytes = strlen((char *)value);
4870  PF_LongMultiPack(&bytes, 1, PF_INT);
4871  PF_LongMultiPack(value, bytes, PF_BYTE);
4872  }
4874  if ( PF.me != MASTER ) {
4875  static vector<UBYTE> prevarbuf;
4876  UBYTE *value;
4877  int bytes;
4878 
4879  PF_LongMultiUnpack(&bytes, 1, PF_INT);
4880  prevarbuf.reserve(bytes + 1);
4881  value = &prevarbuf[0];
4882  PF_LongMultiUnpack(value, bytes, PF_BYTE);
4883  value[bytes] = '\0'; // null terminator
4884  PutPreVar((UBYTE *)"optimvalue_", value, NULL, 1);
4885 
4886  PF_LongMultiUnpack(&bytes, 1, PF_INT);
4887  prevarbuf.reserve(bytes + 1);
4888  value = &prevarbuf[0];
4889  PF_LongMultiUnpack(value, bytes, PF_BYTE);
4890  value[bytes] = '\0'; // null terminator
4891  PutPreVar((UBYTE *)"optimscheme_", value, NULL, 1);
4892  }
4893 #endif
4894 
4895  // cleanup
4896  M_free(optimize_expr,"LoadOptim");
4897 
4898 #ifdef DEBUG
4899  MesPrint ("*** [%s, w=%w] DONE: Optimize", thetime_str().c_str());
4900 #endif
4901 
4902  return 0;
4903 }
4904 
4905 /*
4906  #] Optimize :
4907  #[ ClearOptimize :
4908 */
4909 
4925 {
4926  char str[20];
4927  WORD numexpr, *w;
4928  int error = 0;
4929  if ( AO.OptimizeResult.code != NULL ) {
4930  M_free(AO.OptimizeResult.code, "optimize output");
4931  AO.OptimizeResult.code = NULL;
4932  AO.OptimizeResult.codesize = 0;
4933  PutPreVar((UBYTE *)"optimminvar_",(UBYTE *)("0"),0,1);
4934  PutPreVar((UBYTE *)"optimmaxvar_",(UBYTE *)("0"),0,1);
4935  PruneExtraSymbols(AO.OptimizeResult.minvar-1);
4936  cbuf[AM.sbufnum].numrhs = AO.OptimizeResult.minvar-1;
4937  AO.OptimizeResult.minvar = AO.OptimizeResult.maxvar = 0;
4938  }
4939  if ( AO.OptimizeResult.nameofexpr != NULL ) {
4940 /*
4941  We have to pick up the expression by its name. Numbers may change.
4942  Note that this requires that when we overwrite an expression, we
4943  check that it is not an optimized expression. See execute.c and
4944  comexpr.c
4945 */
4946  if ( GetName(AC.exprnames,AO.OptimizeResult.nameofexpr,&numexpr,NOAUTO) != CEXPRESSION ) {
4947  MesPrint("@Internal error while clearing optimized expression %s ",AO.OptimizeResult.nameofexpr);
4948  Terminate(-1);
4949  }
4950  M_free(AO.OptimizeResult.nameofexpr, "optimize expression name");
4951  AO.OptimizeResult.nameofexpr = NULL;
4952  w = &(Expressions[numexpr].status);
4953  *w = SetExprCases(DROP,1,*w);
4954  if ( *w < 0 ) error = 1;
4955  }
4956  snprintf (str,20,"%d",cbuf[AM.sbufnum].numrhs);
4957  PutPreVar(AM.oldnumextrasymbols,(UBYTE *)str,0,1);
4958  PutPreVar((UBYTE *)"optimvalue_",(UBYTE *)("0"),0,1);
4959  PutPreVar((UBYTE *)"optimscheme_",(UBYTE *)("0"),0,1);
4960  return(error);
4961 }
4962 
4963 /*
4964  #] ClearOptimize :
4965 */
int Optimize(WORD exprnr, int do_print)
Definition: optimize.cc:4587
int PF_LongSingleUnpack(void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:1503
int PutPreVar(UBYTE *, UBYTE *, UBYTE *, int)
Definition: pre.c:642
int PF_Pack(const void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:642
int PF_LongSingleReceive(int src, int tag, int *psrc, int *ptag)
Definition: mpi.c:1583
WORD getpower(const WORD *term, int var, int pos)
Definition: optimize.cc:579
vector< WORD > optimize_greedy(vector< WORD > instr, LONG time_limit)
Definition: optimize.cc:3727
void optimize_expression_given_Horner()
Definition: optimize.cc:4014
vector< WORD > merge_operators(const vector< WORD > &all_instr, bool move_coeff)
Definition: optimize.cc:2356
void PF_BroadcastBuffer(WORD **buffer, LONG *length)
Definition: parallel.c:2110
int PF_Unpack(void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:671
vector< WORD > Horner_tree(const WORD *expr, const vector< WORD > &order)
Definition: optimize.cc:852
vector< optimization > find_optimizations(const vector< WORD > &instr)
Definition: optimize.cc:2651
LONG get_expression(int exprnr)
Definition: optimize.cc:204
int PF_LongMultiBroadcast(void)
Definition: mpi.c:1807
bool term_compare(const WORD *a, const WORD *b)
Definition: optimize.cc:831
void fixarg(UWORD *t, WORD &n)
Definition: optimize.cc:593
void build_Horner_tree(const WORD **terms, int numterms, int var, int maxvar, int pos, vector< WORD > *res)
Definition: optimize.cc:631
Definition: structs.h:938
Definition: structs.h:293
void my_random_shuffle(PHEAD RandomAccessIterator fr, RandomAccessIterator to)
Definition: optimize.cc:180
WORD StoreTerm(PHEAD WORD *)
Definition: sort.c:4333
WORD generate_expression(WORD exprnr)
Definition: optimize.cc:4394
vector< WORD > occurrence_order(const WORD *expr, bool rev)
Definition: optimize.cc:498
int ClearOptimize()
Definition: optimize.cc:4924
vector< WORD > recycle_variables(const vector< WORD > &all_instr)
Definition: optimize.cc:3867
LONG TimeWallClock(WORD)
Definition: tools.c:3476
int PF_PrepareLongMultiPack(void)
Definition: mpi.c:1643
int PF_LongSingleSend(int to, int tag)
Definition: mpi.c:1540
int PF_Broadcast(void)
Definition: mpi.c:883
int PF_PreparePack(void)
Definition: mpi.c:624
bool do_optimization(const optimization optim, vector< WORD > &instr, int newid)
Definition: optimize.cc:2884
int PF_LongSinglePack(const void *buffer, size_t count, MPI_Datatype type)
Definition: mpi.c:1469
VOID LowerSortLevel()
Definition: sort.c:4727
VOID optimize_print_code(int print_expr)
Definition: optimize.cc:4474
void find_Horner_MCTS()
Definition: optimize.cc:2208
int partial_factorize(vector< WORD > &instr, int n, int improve)
Definition: optimize.cc:3433
int count_operators(const WORD *expr, bool print=false)
Definition: optimize.cc:401
WORD PutOut(PHEAD WORD *, POSITION *, FILEHANDLE *, WORD)
Definition: sort.c:1405
struct NoDe NODE
int PF_Send(int to, int tag)
Definition: mpi.c:822
WORD NewSort(PHEAD0)
Definition: sort.c:592
WORD Generator(PHEAD WORD *, WORD)
Definition: proces.c:3101
vector< WORD > generate_instructions(const vector< WORD > &tree, bool do_CSE)
Definition: optimize.cc:1086
int PF_PrepareLongSinglePack(void)
Definition: mpi.c:1451
int count_operators_cse(const vector< WORD > &tree)
Definition: optimize.cc:1295
VOID generate_output(const vector< WORD > &instr, int exprnr, int extraoffset, const vector< vector< WORD > > &brackets)
Definition: optimize.cc:4245
int PF_Receive(int src, int tag, int *psrc, int *ptag)
Definition: mpi.c:848
LONG EndSort(PHEAD WORD *, int)
Definition: sort.c:682
vector< vector< WORD > > get_brackets()
Definition: optimize.cc:281