Scippy

SCIP

Solving Constraint Integer Programs

bandit_epsgreedy.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2019 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not visit scip.zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file bandit_epsgreedy.c
17  * @brief implementation of epsilon greedy bandit algorithm
18  * @author Gregor Hendel
19  */
20 
21 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
22 
23 #include "scip/bandit.h"
24 #include "scip/bandit_epsgreedy.h"
25 #include "scip/pub_bandit.h"
26 #include "scip/pub_message.h"
27 #include "scip/pub_misc.h"
28 #include "scip/scip_bandit.h"
29 #include "scip/scip_mem.h"
30 #include "scip/scip_randnumgen.h"
31 
32 #define BANDIT_NAME "eps-greedy"
33 #define EPSGREEDY_SMALL 1e-6
34 
35 /*
36  * Data structures
37  */
38 
39 /** private data structure of epsilon greedy bandit algorithm */
40 struct SCIP_BanditData
41 {
42  SCIP_Real* weights; /**< weights for every action */
43  SCIP_Real* priorities; /**< saved priorities for tie breaking */
44  int* sels; /**< individual number of selections per action */
45  SCIP_Real eps; /**< epsilon parameter (between 0 and 1) to control epsilon greedy */
46  SCIP_Real decayfactor; /**< the factor to reduce the weight of older observations if exponential decay is enabled */
47  int avglim; /**< nonnegative limit on observation number before the exponential decay starts,
48  * only relevant if exponential decay is enabled
49  */
50  int nselections; /**< counter for the number of selection calls */
51  SCIP_Bool preferrecent; /**< should the weights be updated in an exponentially decaying way? */
52 };
53 
54 /*
55  * Callback methods of bandit algorithm virtual function table
56  */
57 
58 /** callback to free bandit specific data structures */
59 SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
60 { /*lint --e{715}*/
61  SCIP_BANDITDATA* banditdata;
62  int nactions;
63 
64  assert(bandit != NULL);
65 
66  banditdata = SCIPbanditGetData(bandit);
67  assert(banditdata != NULL);
68  assert(banditdata->weights != NULL);
69  nactions = SCIPbanditGetNActions(bandit);
70 
71  BMSfreeBlockMemoryArray(blkmem, &banditdata->weights, nactions);
72  BMSfreeBlockMemoryArray(blkmem, &banditdata->priorities, nactions);
73  BMSfreeBlockMemoryArray(blkmem, &banditdata->sels, nactions);
74  BMSfreeBlockMemory(blkmem, &banditdata);
75 
76  SCIPbanditSetData(bandit, NULL);
77 
78  return SCIP_OKAY;
79 }
80 
81 /** selection callback for bandit algorithm */
82 SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
83 { /*lint --e{715}*/
84  SCIP_BANDITDATA* banditdata;
85  SCIP_Real randnr;
86  SCIP_Real curreps;
87  SCIP_RANDNUMGEN* rng;
88  int nactions;
89  assert(bandit != NULL);
90  assert(selection != NULL);
91 
92  banditdata = SCIPbanditGetData(bandit);
93  assert(banditdata != NULL);
94  rng = SCIPbanditGetRandnumgen(bandit);
95  assert(rng != NULL);
96 
97  nactions = SCIPbanditGetNActions(bandit);
98 
99  /* roll the dice to check if the best element should be picked, or an element at random */
100  randnr = SCIPrandomGetReal(rng, 0.0, 1.0);
101 
102  /* make epsilon decrease with an increasing number of selections */
103  banditdata->nselections++;
104  curreps = banditdata->eps * sqrt((SCIP_Real)nactions/(SCIP_Real)banditdata->nselections);
105 
106  /* select the best action seen so far */
107  if( randnr >= curreps )
108  {
109  SCIP_Real* weights = banditdata->weights;
110  SCIP_Real* priorities = banditdata->priorities;
111  int j;
112  SCIP_Real maxweight;
113 
114  assert(weights != NULL);
115  assert(priorities != NULL);
116 
117  /* pick the element with the largest reward */
118  maxweight = weights[0];
119  *selection = 0;
120 
121  /* determine reward for every element */
122  for( j = 1; j < nactions; ++j )
123  {
124  SCIP_Real weight = weights[j];
125 
126  /* select the action that maximizes the reward, breaking ties by action priorities */
127  if( maxweight < weight
128  || (weight >= maxweight - EPSGREEDY_SMALL && priorities[j] > priorities[*selection] ) )
129  {
130  *selection = j;
131  maxweight = weight;
132  }
133  }
134  }
135  else
136  {
137  /* play one of the actions at random */
138  *selection = SCIPrandomGetInt(rng, 0, nactions - 1);
139  }
140 
141  return SCIP_OKAY;
142 }
143 
144 /** update callback for bandit algorithm */
145 SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
146 { /*lint --e{715}*/
147  SCIP_BANDITDATA* banditdata;
148 
149  assert(bandit != NULL);
150 
151  banditdata = SCIPbanditGetData(bandit);
152  assert(banditdata != NULL);
153 
154  /* increase the selection count */
155  ++banditdata->sels[selection];
156 
157  /* the very first observation is directly stored as weight for both average or exponential decay */
158  if( banditdata->sels[selection] == 1 )
159  banditdata->weights[selection] = score;
160  else
161  {
162  /* use exponentially decreasing weights for older observations */
163  if( banditdata->preferrecent && banditdata->sels[selection] > banditdata->avglim )
164  {
165  /* decrease old weights by decay factor */
166  banditdata->weights[selection] *= banditdata->decayfactor;
167  banditdata->weights[selection] += (1.0 - banditdata->decayfactor) * score;
168  }
169  else
170  {
171  /* update average score */
172  SCIP_Real diff = score - banditdata->weights[selection];
173  banditdata->weights[selection] += diff / (SCIP_Real)(banditdata->sels[selection]);
174  }
175  }
176 
177  return SCIP_OKAY;
178 }
179 
180 /** reset callback for bandit algorithm */
181 SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
182 { /*lint --e{715}*/
183  SCIP_BANDITDATA* banditdata;
184  SCIP_Real* weights;
185  int w;
186  int nactions;
187  SCIP_RANDNUMGEN* rng;
188 
189  assert(bandit != NULL);
190 
191  banditdata = SCIPbanditGetData(bandit);
192  assert(banditdata != NULL);
193 
194  weights = banditdata->weights;
195  nactions = SCIPbanditGetNActions(bandit);
196  assert(weights != NULL);
197  assert(banditdata->priorities != NULL);
198  assert(nactions > 0);
199 
200  rng = SCIPbanditGetRandnumgen(bandit);
201  assert(rng != NULL);
202 
203  /* alter priorities slightly to make them unique */
204  if( priorities != NULL )
205  {
206  for( w = 1; w < nactions; ++w )
207  {
208  assert(priorities[w] >= 0);
209  banditdata->priorities[w] = priorities[w] + SCIPrandomGetReal(rng, -EPSGREEDY_SMALL, EPSGREEDY_SMALL);
210  }
211  }
212  else
213  {
214  /* use random priorities */
215  for( w = 0; w < nactions; ++w )
216  banditdata->priorities[w] = SCIPrandomGetReal(rng, 0.0, 1.0);
217  }
218 
219  /* reset weights and selection counters to 0 */
220  BMSclearMemoryArray(weights, nactions);
221  BMSclearMemoryArray(banditdata->sels, nactions);
222 
223  banditdata->nselections = 0;
224 
225  return SCIP_OKAY;
226 }
227 
228 /*
229  * interface methods of the Epsilon Greedy bandit algorithm
230  */
231 
232 /** internal method to create and reset epsilon greedy bandit algorithm */
234  BMS_BLKMEM* blkmem, /**< block memory */
235  BMS_BUFMEM* bufmem, /**< buffer memory */
236  SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
237  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
238  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
239  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
240  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
241  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
242  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
243  * only relevant if exponential decay is enabled
244  */
245  int nactions, /**< the positive number of possible actions */
246  unsigned int initseed /**< initial random seed */
247  )
248 {
249  SCIP_BANDITDATA* banditdata;
250 
251  SCIP_ALLOC( BMSallocBlockMemory(blkmem, &banditdata) );
252  assert(banditdata != NULL);
253  assert(eps >= 0.0);
254 
255  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->weights, nactions) );
256  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->priorities, nactions) );
257  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->sels, nactions) );
258  banditdata->eps = eps;
259  banditdata->nselections = 0;
260  banditdata->preferrecent = preferrecent;
261  banditdata->decayfactor = decayfactor;
262  banditdata->avglim = avglim;
263 
264  SCIP_CALL( SCIPbanditCreate(epsgreedy, vtable, blkmem, bufmem, priorities, nactions, initseed, banditdata) );
265 
266  return SCIP_OKAY;
267 }
268 
269 /** create and resets an epsilon greedy bandit algorithm */
271  SCIP* scip, /**< SCIP data structure */
272  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
273  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
274  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
275  SCIP_Bool preferrecent, /**< should the weights be updated in an exponentially decaying way? */
276  SCIP_Real decayfactor, /**< the factor to reduce the weight of older observations if exponential decay is enabled */
277  int avglim, /**< nonnegative limit on observation number before the exponential decay starts,
278  * only relevant if exponential decay is enabled
279  */
280  int nactions, /**< the positive number of possible actions */
281  unsigned int initseed /**< initial seed for random number generation */
282  )
283 {
284  SCIP_BANDITVTABLE* vtable;
285  assert(scip != NULL);
286  assert(epsgreedy != NULL);
287 
288  vtable = SCIPfindBanditvtable(scip, BANDIT_NAME);
289  if( vtable == NULL )
290  {
291  SCIPerrorMessage("Could not find virtual function table for %s bandit algorithm\n", BANDIT_NAME);
292  return SCIP_INVALIDDATA;
293  }
294 
295  SCIP_CALL( SCIPbanditCreateEpsgreedy(SCIPblkmem(scip), SCIPbuffer(scip), vtable, epsgreedy,
296  priorities, eps, preferrecent, decayfactor, avglim, nactions, SCIPinitializeRandomSeed(scip, (int)(initseed % INT_MAX))) );
297 
298  return SCIP_OKAY;
299 }
300 
301 /** get weights array of epsilon greedy bandit algorithm */
303  SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
304  )
305 {
306  SCIP_BANDITDATA* banditdata;
307  assert(epsgreedy != NULL);
308  banditdata = SCIPbanditGetData(epsgreedy);
309  assert(banditdata != NULL);
310 
311  return banditdata->weights;
312 }
313 
314 /** set epsilon parameter of epsilon greedy bandit algorithm */
316  SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
317  SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
318  )
319 {
320  SCIP_BANDITDATA* banditdata;
321  assert(epsgreedy != NULL);
322  assert(eps >= 0);
323 
324  banditdata = SCIPbanditGetData(epsgreedy);
325 
326  banditdata->eps = eps;
327 }
328 
329 
330 /** creates the epsilon greedy bandit algorithm includes it in SCIP */
332  SCIP* scip /**< SCIP data structure */
333  )
334 {
335  SCIP_BANDITVTABLE* banditvtable;
336 
337  SCIP_CALL( SCIPincludeBanditvtable(scip, &banditvtable, BANDIT_NAME,
338  SCIPbanditFreeEpsgreedy, SCIPbanditSelectEpsgreedy, SCIPbanditUpdateEpsgreedy, SCIPbanditResetEpsgreedy) );
339 
340  return SCIP_OKAY;
341 }
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define NULL
Definition: def.h:253
public methods for memory management
internal methods for bandit algorithms
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
Definition: misc.c:9640
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:53
#define BANDIT_NAME
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
Definition: bandit.c:180
real eps
SCIP_VAR * w
Definition: circlepacking.c:58
unsigned int SCIPinitializeRandomSeed(SCIP *scip, unsigned int initialseedvalue)
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
#define SCIPerrorMessage
Definition: pub_message.h:45
SCIPInterval sqrt(const SCIPInterval &x)
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
Definition: scip_mem.c:47
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
Definition: scip_bandit.c:70
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, SCIP_Bool preferrecent, SCIP_Real decayfactor, int avglim, int nactions, unsigned int initseed)
#define SCIP_CALL(x)
Definition: def.h:365
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
Definition: bandit.c:283
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:190
#define BMSfreeBlockMemory(mem, ptr)
Definition: memory.h:454
public data structures and miscellaneous methods
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
Definition: misc.c:9618
#define SCIP_Bool
Definition: def.h:70
#define BMSallocBlockMemoryArray(mem, ptr, num)
Definition: memory.h:443
#define BMSfreeBlockMemoryArray(mem, ptr, num)
Definition: memory.h:456
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
public methods for bandit algorithms
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
Definition: scip_mem.c:62
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
Definition: bandit.c:293
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
Definition: scip_bandit.c:38
public methods for bandit algorithms
struct SCIP_BanditData SCIP_BANDITDATA
Definition: type_bandit.h:47
public methods for random numbers
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
#define EPSGREEDY_SMALL
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
public methods for message output
#define SCIP_Real
Definition: def.h:164
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
internal methods for epsilon greedy bandit selection
#define BMSallocBlockMemory(mem, ptr)
Definition: memory.h:441
#define BMSclearMemoryArray(ptr, num)
Definition: memory.h:120
struct BMS_BlkMem BMS_BLKMEM
Definition: memory.h:427
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
#define SCIP_ALLOC(x)
Definition: def.h:376
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:32