Scippy

SCIP

Solving Constraint Integer Programs

bandit_epsgreedy.c
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2018 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file bandit_epsgreedy.c
17  * @brief implementation of epsilon greedy bandit algorithm
18  * @author Gregor Hendel
19  */
20 
21 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
22 
23 #include <assert.h>
24 #include "blockmemshell/memory.h"
25 #include "scip/bandit_epsgreedy.h"
26 #include "scip/scip.h"
27 
28 #define BANDIT_NAME "eps-greedy"
29 #define DEFAULT_WEIGHT 0.2
30 /*
31  * Data structures
32  */
33 
34 /** private data structure of epsilon greedy bandit algorithm */
35 struct SCIP_BanditData
36 {
37  SCIP_Real eps; /**< epsilon parameter (between 0 and 1) to control epsilon greedy */
38  SCIP_Real* weights; /**< weights for every action */
39  int nselections; /**< counter for the number of selection calls */
40 };
41 
42 /*
43  * Callback methods of bandit algorithm virtual function table
44  */
45 
46 /** callback to free bandit specific data structures */
47 SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
48 { /*lint --e{715}*/
49 
50  SCIP_BANDITDATA* banditdata;
51 
52  assert(bandit != NULL);
53 
54  banditdata = SCIPbanditGetData(bandit);
55  assert(banditdata != NULL);
56  assert(banditdata->weights != NULL);
57 
58  BMSfreeBlockMemoryArray(blkmem, &banditdata->weights, SCIPbanditGetNActions(bandit));
59  BMSfreeBlockMemory(blkmem, &banditdata);
60 
61  SCIPbanditSetData(bandit, NULL);
62 
63  return SCIP_OKAY;
64 }
65 
66 /** selection callback for bandit algorithm */
67 SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
68 { /*lint --e{715}*/
69 
70  SCIP_BANDITDATA* banditdata;
71  SCIP_Real randnr;
72  SCIP_Real curreps;
73  SCIP_RANDNUMGEN* rng;
74  int nactions;
75  assert(bandit != NULL);
76  assert(selection != NULL);
77 
78  banditdata = SCIPbanditGetData(bandit);
79  assert(banditdata != NULL);
80  rng = SCIPbanditGetRandnumgen(bandit);
81  assert(rng != NULL);
82 
83  nactions = SCIPbanditGetNActions(bandit);
84 
85  /* roll the dice to check if the best element should be picked, or an element at random */
86  randnr = SCIPrandomGetReal(rng, 0.0, 1.0);
87 
88  /* make epsilon decrease with an increasing number of selections */
89  banditdata->nselections++;
90  curreps = banditdata->eps * sqrt((SCIP_Real)nactions/(SCIP_Real)banditdata->nselections);
91 
92  /* select the best action seen so far */
93  if( randnr >= curreps )
94  {
95  SCIP_Real* weights = banditdata->weights;
96  int j;
97  SCIP_Real maxreward;
98 
99  assert(weights != NULL);
100 
101  /* pick the element with the largest reward */
102  maxreward = weights[0];
103  *selection = 0;
104 
105  /* determine reward for every element */
106  for( j = 1; j < nactions; ++j )
107  {
108  SCIP_Real reward = weights[j];
109 
110  if( maxreward < reward )
111  {
112  *selection = j;
113  maxreward = reward;
114  }
115  }
116  }
117  else
118  {
119  /* play one of the actions at random */
120  *selection = SCIPrandomGetInt(rng, 0, nactions - 1);
121  }
122 
123  return SCIP_OKAY;
124 }
125 
126 /** update callback for bandit algorithm */
127 SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
128 { /*lint --e{715}*/
129  SCIP_BANDITDATA* banditdata;
130 
131  assert(bandit != NULL);
132 
133  banditdata = SCIPbanditGetData(bandit);
134  assert(banditdata != NULL);
135 
136  /* use geometrically decreasing weights for older observations */
137  banditdata->weights[selection] *= 0.9;
138  banditdata->weights[selection] += 0.1 * score;
139 
140  return SCIP_OKAY;
141 }
142 
143 /** reset callback for bandit algorithm */
144 SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
145 { /*lint --e{715}*/
146  SCIP_BANDITDATA* banditdata;
147  SCIP_Real* weights;
148  int w;
149  int nactions;
150 
151  assert(bandit != NULL);
152 
153  banditdata = SCIPbanditGetData(bandit);
154  assert(banditdata != NULL);
155 
156  weights = banditdata->weights;
157  nactions = SCIPbanditGetNActions(bandit);
158  assert(weights != NULL);
159  assert(nactions > 0);
160 
161  /* reset weights to reflect priorities, if priorities are given */
162  if( priorities != NULL )
163  {
164  SCIP_Real priosum;
165  SCIP_Real normalization;
166 
167  /* compute priority sum for normalization */
168  priosum = priorities[0];
169  for( w = 1; w < nactions; ++w )
170  {
171  assert(priorities[w] >= 0);
172  priosum += priorities[w];
173  }
174 
175  if( priosum == 0 || priosum == nactions)
176  {
177  /* initialize all weights with the default weight */
178  for( w = 0; w < nactions; ++w )
179  weights[w] = DEFAULT_WEIGHT;
180  }
181  else
182  {
183  /* use DEFAULT_WEIGHT as the standard epsilon greedy weight for uninitialized neighborhoods */
184  normalization = DEFAULT_WEIGHT * nactions / priosum;
185 
186  /* reset weights */
187  for( w = 0; w < nactions; ++w )
188  weights[w] = priorities[w] * normalization;
189  }
190  }
191  else
192  {
193  /* reset weights */
194  for( w = 0; w < nactions; ++w )
195  weights[w] = DEFAULT_WEIGHT;
196  }
197 
198  banditdata->nselections = 0;
199 
200  return SCIP_OKAY;
201 }
202 
203 /*
204  * interface methods of the Epsilon Greedy bandit algorithm
205  */
206 
207 /** internal method to create and reset epsilon greedy bandit algorithm */
209  BMS_BLKMEM* blkmem, /**< block memory */
210  BMS_BUFMEM* bufmem, /**< buffer memory */
211  SCIP_BANDITVTABLE* vtable, /**< virtual function table with epsilon greedy callbacks */
212  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
213  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
214  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
215  int nactions, /**< the positive number of possible actions */
216  unsigned int initseed /**< initial random seed */
217  )
218 {
219  SCIP_BANDITDATA* banditdata;
220 
221  SCIP_ALLOC( BMSallocBlockMemory(blkmem, &banditdata) );
222  assert(banditdata != NULL);
223  assert(eps >= 0.0);
224 
225  SCIP_ALLOC( BMSallocBlockMemoryArray(blkmem, &banditdata->weights, nactions) );
226  banditdata->eps = eps;
227  banditdata->nselections = 0;
228 
229  SCIP_CALL( SCIPbanditCreate(epsgreedy, vtable, blkmem, bufmem, priorities, nactions, initseed, banditdata) );
230 
231  return SCIP_OKAY;
232 }
233 
234 /** create and resets an epsilon greedy bandit algorithm */
236  SCIP* scip, /**< SCIP data structure */
237  SCIP_BANDIT** epsgreedy, /**< pointer to store the epsilon greedy bandit algorithm */
238  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
239  SCIP_Real eps, /**< parameter to increase probability for exploration between all actions */
240  int nactions, /**< the positive number of possible actions */
241  unsigned int initseed /**< initial seed for random number generation */
242  )
243 {
244  SCIP_BANDITVTABLE* vtable;
245  assert(scip != NULL);
246  assert(epsgreedy != NULL);
247 
248  vtable = SCIPfindBanditvtable(scip, BANDIT_NAME);
249  if( vtable == NULL )
250  {
251  SCIPerrorMessage("Could not find virtual function table for %s bandit algorithm\n", BANDIT_NAME);
252  return SCIP_INVALIDDATA;
253  }
254 
255  SCIP_CALL( SCIPbanditCreateEpsgreedy(SCIPblkmem(scip), SCIPbuffer(scip), vtable, epsgreedy,
256  priorities, eps, nactions, SCIPinitializeRandomSeed(scip, (int)(initseed % INT_MAX))) );
257 
258  return SCIP_OKAY;
259 }
260 
261 /** get weights array of epsilon greedy bandit algorithm */
263  SCIP_BANDIT* epsgreedy /**< epsilon greedy bandit algorithm */
264  )
265 {
266  SCIP_BANDITDATA* banditdata;
267  assert(epsgreedy != NULL);
268  banditdata = SCIPbanditGetData(epsgreedy);
269  assert(banditdata != NULL);
270 
271  return banditdata->weights;
272 }
273 
274 /** set epsilon parameter of epsilon greedy bandit algorithm */
276  SCIP_BANDIT* epsgreedy, /**< epsilon greedy bandit algorithm */
277  SCIP_Real eps /**< parameter to increase probability for exploration between all actions */
278  )
279 {
280  SCIP_BANDITDATA* banditdata;
281  assert(epsgreedy != NULL);
282  assert(eps >= 0);
283 
284  banditdata = SCIPbanditGetData(epsgreedy);
285 
286  banditdata->eps = eps;
287 }
288 
289 
290 /** creates the epsilon greedy bandit algorithm includes it in SCIP */
292  SCIP* scip /**< SCIP data structure */
293  )
294 {
295  SCIP_BANDITVTABLE* banditvtable;
296 
297  SCIP_CALL( SCIPincludeBanditvtable(scip, &banditvtable, BANDIT_NAME,
298  SCIPbanditFreeEpsgreedy, SCIPbanditSelectEpsgreedy, SCIPbanditUpdateEpsgreedy, SCIPbanditResetEpsgreedy) );
299 
300  return SCIP_OKAY;
301 }
SCIP_RETCODE SCIPbanditCreateEpsgreedy(BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_BANDITVTABLE *vtable, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, int nactions, unsigned int initseed)
void SCIPsetEpsilonEpsgreedy(SCIP_BANDIT *epsgreedy, SCIP_Real eps)
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:53
#define BANDIT_NAME
SCIP_BANDITVTABLE * SCIPfindBanditvtable(SCIP *scip, const char *name)
Definition: scip_bandit.c:64
int SCIPrandomGetInt(SCIP_RANDNUMGEN *randnumgen, int minrandval, int maxrandval)
Definition: misc.c:9366
SCIP_BANDITDATA * SCIPbanditGetData(SCIP_BANDIT *bandit)
Definition: bandit.c:180
real eps
BMS_BUFMEM * SCIPbuffer(SCIP *scip)
Definition: scip.c:46746
SCIP_DECL_BANDITSELECT(SCIPbanditSelectEpsgreedy)
#define SCIPerrorMessage
Definition: pub_message.h:45
SCIPInterval sqrt(const SCIPInterval &x)
BMS_BLKMEM * SCIPblkmem(SCIP *scip)
Definition: scip.c:46731
#define SCIP_CALL(x)
Definition: def.h:350
void SCIPbanditSetData(SCIP_BANDIT *bandit, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:190
#define BMSfreeBlockMemory(mem, ptr)
Definition: memory.h:446
#define BMSallocBlockMemoryArray(mem, ptr, num)
Definition: memory.h:435
#define BMSfreeBlockMemoryArray(mem, ptr, num)
Definition: memory.h:448
SCIP_RETCODE SCIPcreateBanditEpsgreedy(SCIP *scip, SCIP_BANDIT **epsgreedy, SCIP_Real *priorities, SCIP_Real eps, int nactions, unsigned int initseed)
unsigned int SCIPinitializeRandomSeed(SCIP *scip, int initialseedvalue)
Definition: scip.c:25905
SCIP_DECL_BANDITRESET(SCIPbanditResetEpsgreedy)
SCIP_Real SCIPrandomGetReal(SCIP_RANDNUMGEN *randnumgen, SCIP_Real minrandval, SCIP_Real maxrandval)
Definition: misc.c:9388
struct SCIP_BanditData SCIP_BANDITDATA
Definition: type_bandit.h:47
SCIP_DECL_BANDITFREE(SCIPbanditFreeEpsgreedy)
SCIP_DECL_BANDITUPDATE(SCIPbanditUpdateEpsgreedy)
SCIP_RETCODE SCIPincludeBanditvtable(SCIP *scip, SCIP_BANDITVTABLE **banditvtable, const char *name, SCIP_DECL_BANDITFREE((*banditfree)), SCIP_DECL_BANDITSELECT((*banditselect)), SCIP_DECL_BANDITUPDATE((*banditupdate)), SCIP_DECL_BANDITRESET((*banditreset)))
Definition: scip_bandit.c:32
#define SCIP_Real
Definition: def.h:149
SCIP_Real * SCIPgetWeightsEpsgreedy(SCIP_BANDIT *epsgreedy)
int SCIPbanditGetNActions(SCIP_BANDIT *bandit)
Definition: bandit.c:265
SCIP_RETCODE SCIPincludeBanditvtableEpsgreedy(SCIP *scip)
internal methods for epsilon greedy bandit selection
SCIP_RANDNUMGEN * SCIPbanditGetRandnumgen(SCIP_BANDIT *bandit)
Definition: bandit.c:255
#define BMSallocBlockMemory(mem, ptr)
Definition: memory.h:433
struct BMS_BlkMem BMS_BLKMEM
Definition: memory.h:419
#define SCIP_ALLOC(x)
Definition: def.h:361
#define DEFAULT_WEIGHT
SCIP_RETCODE SCIPbanditCreate(SCIP_BANDIT **bandit, SCIP_BANDITVTABLE *banditvtable, BMS_BLKMEM *blkmem, BMS_BUFMEM *bufmem, SCIP_Real *priorities, int nactions, unsigned int initseed, SCIP_BANDITDATA *banditdata)
Definition: bandit.c:32
SCIP callable library.
memory allocation routines