Scippy

SCIP

Solving Constraint Integer Programs

pub_bandit_exp3.h
Go to the documentation of this file.
1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2 /* */
3 /* This file is part of the program and library */
4 /* SCIP --- Solving Constraint Integer Programs */
5 /* */
6 /* Copyright (C) 2002-2018 Konrad-Zuse-Zentrum */
7 /* fuer Informationstechnik Berlin */
8 /* */
9 /* SCIP is distributed under the terms of the ZIB Academic License. */
10 /* */
11 /* You should have received a copy of the ZIB Academic License */
12 /* along with SCIP; see the file COPYING. If not email to scip@zib.de. */
13 /* */
14 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
15 
16 /**@file pub_bandit_exp3.h
17  * @ingroup PublicBanditMethods
18  * @brief public methods for Exp.3
19  * @author Gregor Hendel
20  */
21 
22 /*---+----1----+----2----+----3----+----4----+----5----+----6----+----7----+----8----+----9----+----0----+----1----+----2*/
23 
24 #ifndef SRC_SCIP_PUB_BANDIT_EXP3_H_
25 #define SRC_SCIP_PUB_BANDIT_EXP3_H_
26 
27 #include "scip/scip.h"
28 
29 #ifdef __cplusplus
30 extern "C" {
31 #endif
32 
33 /**@addtogroup PublicBanditMethods
34  *
35  * ## Exp.3
36  *
37  * Exp.3 is a randomized selection method for the multi-armed bandit problem
38  *
39  * Exp3 maintains a probability distribution
40  * according to which an action is drawn
41  * in every iteration.
42  * The probability distribution is a mixture between
43  * a uniform distribution and a softmax distribution
44  * based on the cumulative rewards of the actions.
45  * The weight of the uniform distribution in the mixture
46  * is controlled by the parameter \f$ \gamma \f$, ie.,
47  * setting \f$ \gamma = 1\f$ uses a uniform distribution
48  * in every selection step.
49  * The cumulative reward for the actions can be
50  * fine-tuned by adding a general bias for all actions.
51  * The bias is given by the parameter \f$ \beta \f$.
52  *
53  * @{
54  */
55 
56 /** creates and resets an Exp.3 bandit algorithm using \p scip pointer */
57 extern
59  SCIP* scip, /**< SCIP data structure */
60  SCIP_BANDIT** exp3, /**< pointer to store bandit algorithm */
61  SCIP_Real* priorities, /**< nonnegative priorities for each action, or NULL if not needed */
62  SCIP_Real gammaparam, /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
63  SCIP_Real beta, /**< gain offset between 0 and 1 at every observation */
64  int nactions, /**< the positive number of actions for this bandit algorithm */
65  unsigned int initseed /**< initial seed for random number generation */
66  );
67 
68 /** set gamma parameter of Exp.3 bandit algorithm to increase weight of uniform distribution */
69 extern
70 void SCIPsetGammaExp3(
71  SCIP_BANDIT* exp3, /**< bandit algorithm */
72  SCIP_Real gammaparam /**< weight between uniform (gamma ~ 1) and weight driven (gamma ~ 0) probability distribution */
73  );
74 
75 /** set beta parameter of Exp.3 bandit algorithm to increase gain offset for actions that were not played */
76 extern
77 void SCIPsetBetaExp3(
78  SCIP_BANDIT* exp3, /**< bandit algorithm */
79  SCIP_Real beta /**< gain offset between 0 and 1 at every observation */
80  );
81 
82 /** returns probability to play an action */
83 extern
85  SCIP_BANDIT* exp3, /**< bandit algorithm */
86  int action /**< index of the requested action */
87  );
88 
89 /** @}*/
90 
91 #ifdef __cplusplus
92 }
93 #endif
94 
95 #endif
SCIP_RETCODE SCIPcreateBanditExp3(SCIP *scip, SCIP_BANDIT **exp3, SCIP_Real *priorities, SCIP_Real gammaparam, SCIP_Real beta, int nactions, unsigned int initseed)
Definition: bandit_exp3.c:299
void SCIPsetBetaExp3(SCIP_BANDIT *exp3, SCIP_Real beta)
Definition: bandit_exp3.c:338
enum SCIP_Retcode SCIP_RETCODE
Definition: type_retcode.h:53
SCIP_Real SCIPgetProbabilityExp3(SCIP_BANDIT *exp3, int action)
Definition: bandit_exp3.c:351
void SCIPsetGammaExp3(SCIP_BANDIT *exp3, SCIP_Real gammaparam)
Definition: bandit_exp3.c:325
#define SCIP_Real
Definition: def.h:149
SCIP callable library.