@incollection{Kaisers2012a, abstract = {The number of proposed reinforcement learning algorithms appears to be ever-growing. This article tackles the diversification by showing a persistent principle in several independent reinforcement learning algorithms that have been applied to multi-agent settings. While their learning structure may look very diverse, algorithms such as Gradient Ascent, Cross learning, variations of Q-learning and Regret minimization all follow the same basic pattern. Variations of Gradient Ascent can be described by the projection dynamics and the other algorithms follow the replicator dynamics. In combination with some modulations of the learning rate and deviations for the sake of exploration, they are primarily different implementations of learning in the direction of the reinforcement gradient.}, author = {Kaisers, Michael and Tuyls, Karl}, booktitle = {Multi-Agent Systems. 9th European Workshop, EUMAS 2011}, editor = {Cossentino, Massimo and Kaisers, Michael and Tuyls, Karl and Weiss, Gerhard}, keywords = {dynamical systems,evolutionary game theory,gradient learning,reinforcement learning}, pages = {145--159}, publisher = {Lecture Notes in Computer Science, Vol. 7541. Springer}, title = {{Multi-agent Learning and the Reinforcement Gradient}}, year = {2012} } @inproceedings{Kaisers2008b, abstract = {Auctions are pervasive in todaypsilas society and provide a variety of real markets. This article facilitates a strategic choice between a set of available trading strategies by introducing a methodology to approximate heuristic payoff tables by normal form games. An example from the auction domain is transformed by this means and an evolutionary game theory analysis is applied subsequently. The information loss in the normal form approximation is shown to be reasonably small such that the concise normal form ...}, author = {Kaisers, Michael and Tuyls, Karl and Thuijsman, Frank and Parsons, Simon}, booktitle = {Proc. of Int. Conf. on Web Intelligence and Intelligent Agent Technology (WI-IAT 2008)}, doi = {10.1109/WIIAT.2008.261}, isbn = {978-0-7695-3496-1}, month = dec, pages = {447--450}, publisher = {IEEE/WIC/ACM}, title = {{Auction Analysis by Normal Form Game Approximation}}, url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4740664}, year = {2008} } @inproceedings{Kaisers2010FAQ, abstract = {Multi-agent learning is a crucial method to control or find solutions for systems, in which more than one entity needs to be adaptive. In today’s interconnected world, such sys- tems are ubiquitous in many domains, including auctions in economics, swarm robotics in computer science, and politics in social sciences. Multi-agent learning is inherently more complex than single-agent learning and has a relatively thin theoretical framework supporting it. Recently, multi-agent learning dynamics have been linked to evolutionary game theory, allowing the interpretation of learning as an evolu- tion of competing policies in the mind of the learning agents. The dynamical system from evolutionary game theory that has been linked to Q-learning predicts the expected behav- ior of the learning agents. Closer analysis however allows for two interesting observations: the predicted behavior is not always the same as the actual behavior, and in case of deviation, the predicted behavior is more desirable. This discrepancy is elucidated in this article, and based on these new insights Frequency Adjusted Q- (FAQ-) learning is pro- posed. This variation of Q-learning perfectly adheres to the predictions of the evolutionary model for an arbitrarily large part of the policy space. In addition to the theoretical dis- cussion, experiments in the three classes of two-agent two- action games illustrate the superiority of FAQ-learning.}, author = {Kaisers, Michael and Tuyls, Karl}, booktitle = {Proc. of 9th Intl. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2010)}, editor = {van der Hoek and Kamina and Lesp\'{e}rance and Luck and Sen}, keywords = {Evolutionary game theory,Multi-agent learning,Q-learning,Replicator dynamics}, pages = {309--315}, publisher = {International Foundation for AAMAS}, title = {{Frequency Adjusted Multi-agent Q-learning}}, year = {2010} } @inproceedings{Kaisers2009a, abstract = {Today’s society is largely connected and many real life applications lend themselves to be modeled as multi-agent systems. Although such systems as well as their models are desirable, e.g. for reasons of sta- bility or parallelism, they are highly complex and therefore difficult to understand or predict. Multi-agent learning has been acknowledged to be indispensable to control or find solutions for such systems. Re- cently, evolutionary game theory has been linked to multi-agent reinforcement learning. However, gaining insight into the dynamics of games, especially if time dependent, remains a challenging problem. This article introduces a new perspective on the reinforcement learning process described by the replicator dy- namics, providing a tool to design time dependent parameters of the game or the learning process. This perspective is orthogonal to the common view of policy trajectories driven by the replicator dynamics. Rather than letting the time dimension collapse, the set of initial policies is considered to be a particle cloud that approximates a distribution and we look at the evolution of this distribution over time. First, the methodology is described, then it is applied to an example game and viable extensions are discussed.}, address = {Eindhoven}, author = {Kaisers, Michael}, booktitle = {Proc. of the 21st Benelux Conference on Artificial Intelligence (BNAIC 2009)}, editor = {Calders, Toon and Tuyls, Karl and Pechenizkiy, Mykola}, keywords = {evolutionary game theory,reinforcement learning}, pages = {113--120}, title = {{Replicator Dynamics for Multi-agent Learning - An Orthogonal Approach}}, year = {2009} } @inproceedings{Hennes2010, author = {Hennes, Daniel and Kaisers, Michael and Tuyls, Karl}, booktitle = {Adaptive and Learning Agents (ALA 2010) Workshop}, keywords = {evolutionary,game theory,multi-agent learning,reinforcement learning,replicator dynamics,stochastic games}, title = {{RESQ-learning in stochastic games}}, url = {http://michaelkaisers.com/publications/2010\_ALA\_DHennes.pdf}, year = {2010} } @inproceedings{Bloembergen2010, abstract = {Multi-agent learning plays an increasingly important role in solving complex dynamic problems in today's society. Recently, an evolutionary game theoretic approach to multi-agent reinforcement learning has been proposed as a first step towards a more general theoretical framework. This article uses the evolutionary game theory perspective to link behavioral properties of learning algorithms to their performance in both homogeneous and heterogeneous games, thereby contributing to a better understanding of multiagent ...}, author = {Bloembergen, Daan and Kaisers, Michael and Tuyls, Karl}, booktitle = {Proc. of 22nd Belgium-Netherlands Conf. on Artificial Intelligence (BNAIC 2010)}, pages = {11--18}, publisher = {University of Luxembourg}, title = {{A comparative study of multi-agent reinforcement learning dynamics}}, url = {http://bnaic2010.uni.lu/Papers/Category A/Bloembergen\_A.pdf}, year = {2010} } @inproceedings{Wunder2011, abstract = {The field of multiagent decision making is extending its tools from classical game theory by embracing reinforcement learning, statistical analysis, and opponent modeling. For example, behavioral economists conclude from experimental results that people act according to levels of reasoning that form a “cognitive hierarchy” of strategies, rather than merely following the hyper-rational Nash equilibrium solution concept. This paper expands this model of the iterative reasoning process by widening the notion of a ...}, author = {Wunder, Michael and Kaisers, Michael and Yaros, J.R. and Littman, Michael}, booktitle = {Proc. of 10th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2011)}, editor = {Tumer and Yolum and Sonenberg and Stone}, keywords = {cognitive models,iterated reasoning,multiagent systems,pomdps,repeated games}, pages = {593--600}, publisher = {International Foundation for AAMAS}, title = {{Using iterated reasoning to predict opponent strategies}}, url = {http://paul.rutgers.edu/~mwunder/pub/LG\_PIPOMDP.pdf}, year = {2011} } @inproceedings{Kaisers2011a, abstract = {This article studies Frequency Adjusted Q-learning (FAQ-learning), a variation of Q- learning that simulates simultaneous value function updates. The main contributions are empirical and theoretical support for the convergence of FAQ-learning to attractors near Nash equilibria in two-agent two-action matrix games. The games can be divided into three types: Matching pennies, Prisoners' Dilemma and Battle of Sexes. This article shows that the Matching pennies and Prisoners' Dilemma yield one attractor of the learning dynamics, ...}, author = {Kaisers, Michael and Tuyls, Karl}, booktitle = {Workshop on Interactive Decision Theory and Game Theory (IDTGT 2011)}, publisher = {Assoc. for the Advancement of Artif. Intel. (AAAI)}, title = {{FAQ-Learning in Matrix Games: Demonstrating Convergence near Nash Equilibria, and Bifurcation of Attractors in the Battle of Sexes}}, url = {http://www.aaai.org/ocs/index.php/WS/AAAIW11/paper/download/3950/4282}, year = {2011} } @incollection{Neumann2012, author = {Neumann, Marcel and Tuyls, Karl and Kaisers, Michael}, booktitle = {Multiagent System Technologies. 10th German Conference, MATES 2012}, editor = {Timm, Ingo J. and Guttmann, Christian}, keywords = {continuous double auctions,timing,zero-intelligence}, pages = {106--115}, publisher = {Lecture Notes in Computer Science, Vol. 7598. Springer}, title = {{Using Time as a Strategic Element in Continuous Double Auctions}}, year = {2012} } @incollection{Ammar2012, author = {Ammar, Haitham Bou and Tuyls, Karl and Kaisers, Michael}, booktitle = {Multiagent System Technologies. 10th German Conference, MATES 2012}, editor = {Timm, Ingo J. and Guttmann, Christian}, pages = {40--52}, publisher = {Lecture Notes in Computer Science, Vol. 7598. Springer}, title = {{Evolutionary Dynamics of Ant Colony Optimization}}, year = {2012} } @inproceedings{Bloembergen2010a, abstract = {Overcoming convergence to suboptimal solutions in cooperative multi-agent games has been a main challenge in reinforcement learning. The concept of “leniency” has been proposed to be more forgiving for initial mis-coordination. It has been shown theoretically that an arbitrarily high certainty of convergence to the global optimum can be achieved by increasing the degree of leniency, but the relation of the evolutionary game theoretic model to the Lenient Q-learning algorithm relied on the simplifying assumption that all actions ...}, author = {Bloembergen, Daan and Kaisers, Michael and Tuyls, Karl}, booktitle = {Proc. of 22nd Belgium-Netherlands Conf. on Artificial Intelligence (BNAIC 2010)}, pages = {19--26}, publisher = {University of Luxembourg}, title = {{Lenient frequency adjusted Q-learning}}, url = {http://michaelkaisers.com/publications/2010\_BNAIC\_Bloembergen\_LFAQ.pdf}, year = {2010} } @inproceedings{Kaisers2012, author = {Kaisers, Michael and Bloembergen, Daan and Tuyls, Karl}, booktitle = {Proc. of 11th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2012)}, editor = {Conitzer and Winikoff and Padgham and van der Hoek}, keywords = {dynamical,evolutionary game theory,gradient learning,multi-agent learning,of the,overview of the dynamics,systems,this section presents an}, pages = {1393--1394}, publisher = {International Foundation for AAMAS}, title = {{A Common Gradient in Multi-agent Reinforcement Learning (Extended Abstract)}}, year = {2012} } @inproceedings{Mescheder2011, author = {Mescheder, Daniel and Tuyls, Karl and Kaisers, Michael}, booktitle = {Proc. of 23nd Belgium-Netherlands Conf. on Artificial Intelligence (BNAIC 2011)}, pages = {152--159}, publisher = {KAHO Sint-Lieven, Gent}, title = {{Opponent Modeling with POMDPs}}, year = {2011} } @inproceedings{Hennes2012, author = {Hennes, Daniel and Bloembergen, Daan and Kaisers, Michael and Tuyls, Karl and Parsons, Simon}, booktitle = {Proc. of the Genetic and Evolutionary Computation Conference (GECCO)}, isbn = {9781450311779}, keywords = {evolutionary game theory,stock markets,value of informa-}, pages = {943--949}, title = {{Evolutionary Advantage of Foresight in Markets}}, year = {2012} } @inproceedings{Bloembergen2011, author = {Bloembergen, Daan and Kaisers, Michael and Tuyls, Karl}, booktitle = {Proc. of 10th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2011)}, editor = {Tumer and Yolum and Sonenberg and Stone}, keywords = {a classical benchmark reinforcement,evolutionary game theory,learning algorithm is single-,lenient learning,multi-agent learning,namics,q-learning,replicator dy-}, pages = {1105--1106}, publisher = {International Foundation for AAMAS}, title = {{Empirical and Theoretical Support for Lenient Learning (Extended Abstract)}}, year = {2011} } @inproceedings{Wunder2010, abstract = {One of the challenges of multiagent decision making is that the behavior needed to maximize utility can depend on what other agents choose to do: sometimes there is no “right” answer in the absence of knowledge of how opponents will act. The Nash equilibrium is a sensible choice of behavior because it represents a mutual best response. But, even when there is a unique equilibrium, other players are under no obligation to take part in it. This observation has been forcefully illustrated in the behavioral economics community ...}, author = {Wunder, Michael and Kaisers, Michael and Littman, Michael and Yaros, John Robert}, booktitle = {Workshop on Interactive Decision Theory and Game Theory (IDTGT 2010)}, publisher = {Assoc. for the Advancement of Artif. Intel. (AAAI)}, title = {{A Cognitive Hierarchy Model Applied to the Lemonade Game}}, year = {2010} } @inproceedings{Kaisers2008a, abstract = {Auctions are pervasive in today's society. They provide a variety of markets, ranging from consumer-toconsumer online auctions to government-to-business auctions for telecommunications spectrum licenses. Starting from a set of trading strategies, this article enables a strategic choice by introducing the use of linear programming as a methodology to approximate heuristic payoff tables by normal form games. This method is evaluated on data from auction simulation by applying an evolutionary game theory analysis. The ...}, author = {Kaisers, Michael and Tuyls, Karl and Thuijsman, Frank}, booktitle = {Proc. of 20th Belgian-Netherlands Conference on Artificial Intelligence (BNAIC 2008)}, keywords = {auction theory,evolutionary game theory,multi-agent games}, pages = {113--120}, publisher = {University of Twente}, title = {{Discovering the game in auctions}}, url = {http://www.sci.brooklyn.cuny.edu/~parsons/projects/mech-design/publications/bnaic08.pdf}, year = {2008} } @article{Ponsen2009, abstract = {In this paper we investigate the evolutionary dynamics of strategic behavior in the game of poker by means of data gathered from a large number of real world poker games. We perform this study from an evolutionary game theoretic perspective using two Replicator Dynamics models. First we consider the basic selection model on this data, secondly we use a model which includes both selection and mutation. We investigate the dynamic properties by studying how rational players switch between different strategies under different ...}, author = {Ponsen, Marc and Tuyls, Karl and Kaisers, Michael and Ramon, Jan}, doi = {10.1016/j.entcom.2009.09.002}, issn = {18759521}, journal = {Entertainment Computing}, keywords = {em poker,no limit texas hold}, month = jan, number = {1}, pages = {39--45}, publisher = {Elsevier}, title = {{An evolutionary game-theoretic analysis of poker strategies}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S1875952109000056}, volume = {1}, year = {2009} } @incollection{Kaisers2010Orthogonal, abstract = {Today’s society is largely connected and many real life appli- cations lend themselves to be modeled as multi-agent systems. Although such systems as well as their models are desirable, e.g., for reasons of stability or parallelism, they are highly complex and therefore difficult to understand or predict. Multi-agent learning has been acknowledged to be indispensable to control or find solutions for such systems. Recently, evolutionary game theory has been linked to multi-agent reinforcement learning. However, gaining insight into the dynamics of games, especially if time dependent, remains a challenging problem. This article introduces a new perspective on the reinforcement learning process described by the replicator dynamics, providing a tool to design time dependent parame- ters of the game or the learning process. This perspective is orthogonal to the common view of policy trajectories driven by the replicator dy- namics. Rather than letting the time dimension collapse, the set of initial policies is considered to be a particle cloud that approximates a distri- bution and we look at the evolution of this distribution over time. First, the methodology is described, then it is applied to an example game and viable extensions are discussed.}, author = {Kaisers, Michael and Tuyls, Karl}, booktitle = {Adaptive and Learning Agents. Second Workshop, ALA 2009}, editor = {Taylor, Matthew E. and Tuyls, Karl}, keywords = {evolutionary game theory,reinforcement learning}, pages = {49--59}, publisher = {Lecture Notes in Computer Science, Vol. 5924. Springer}, title = {{Replicator Dynamics for Multi-agent Learning - An Orthogonal Approach}}, url = {http://www.springerlink.com/index/77G519902M773965.pdf}, year = {2010} } @inproceedings{Alers2011, abstract = {We show the emergence of Swarm Intelligence in physical robots. We transfer an optimization algorithm which is based on beeforaging behavior to a robotic swarm. In simulation this algorithm has already been shown to be more effective, scalable and adaptive than algorithms inspired by ant foraging. In addition to this advantage, bee-inspired foraging does not require (de-) centralized simulation of environmental parameters (eg pheromones).}, author = {Alers, Sjriek and Bloembergen, Daan and Hennes, Daniel and de Jong, Steven and Kaisers, Michael and Lemmens, Nyree and Tuyls, Karl and Weiss, Gerhard}, booktitle = {Proc. of 10th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2011)}, editor = {Tumer and Yolum and Sonenberg and Stone}, keywords = {foraging,swarm intelligence,swarm robotics}, pages = {1311--1312}, publisher = {International Foundation for AAMAS}, title = {{Bee-inspired foraging in an embodied swarm (Demonstration)}}, url = {http://sjriek.nl/wp-content/papercite-data/pdf/alers2011bee.pdf}, year = {2011} } @inproceedings{Wunder2012, author = {Wunder, Michael and Kaisers, Michael and Yaros, John Robert and Littman, Michael}, booktitle = {Proc. of 11th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2012)}, editor = {Conitzer and Winikoff and Padgham and van der Hoek}, keywords = {iterated reasoning,learning in populations,multiagent learning}, pages = {947--954}, publisher = {International Foundation for AAMAS}, title = {{A Framework for Modeling Population Strategies by Depth of Reasoning}}, year = {2012} } @inproceedings{Kaisers2009, abstract = {Multi-agent learning is a challenging problem and has recently attracted increased attention by the research community [4, 5]. It promises control over complex multi-agent systems such that agents enact a global desired behavior while operating on local knowledge.}, author = {Kaisers, Michael and Tuyls, Karl and Parsons, Simon}, booktitle = {Proc. of 8th Int. Conf. on Autonomous Agents and Multiagent Systems (AAMAS 2009)}, editor = {Decker and Sichman and Sierra and Castelfranchi}, keywords = {auctions,dynamics,evolutionary game theory,multi-agent learning,q-learning,replicator}, pages = {1255--1256}, publisher = {International Foundation for AAMAS}, title = {{An Evolutionary Model of Multi-agent Learning with a Varying Exploration Rate (Extended Abstract)}}, year = {2009} } @incollection{VandenHerik2007, abstract = {In this paper we compare state-of-the-art multi-agent reinforcement learning algorithms in a wide variety of games. We consider two types of algorithms: value iteration and policy iteration. Four characteristics are studied: initial conditions, parameter settings, convergence speed, and local versus global convergence. Global convergence is still difficult to achieve in practice, despite existing theoretical guarantees. Multiple visualizations are included to provide a comprehensive insight into the learning dynamics.}, author = {van den Herik, Jaap H. and Hennes, Daniel and Kaisers, Michael and Tuyls, Karl and Verbeeck, Katja}, booktitle = {Cooperative Information Agents XI, LNAI}, pages = {36--56}, publisher = {Springer}, title = {{Multi-agent learning dynamics: A survey}}, url = {http://www.springerlink.com/index/dh25513561546332.pdf}, volume = {4676}, year = {2007} }