@conference {308, title = {Delay Scheduling: A Simple Technique for Achieving Locality and Fairness in Cluster Scheduling}, booktitle = {EuroSys}, year = {2010}, month = {04/2010}, address = {Paris, France}, abstract = {As organizations start to use data-intensive cluster computing systems like Hadoop and Dryad for more applications, there is a growing need to share clusters between users. However, there is a conflict between fairness in scheduling and data locality (placing tasks on nodes that contain their input data). We illustrate this problem through our experience designing a fair scheduler for a 600-node Hadoop cluster at Facebook. To address the conflict between locality and fairness, we propose a simple algorithm called delay scheduling: when the job that should be scheduled next according to fairness cannot launch a local task, it waits for a small amount of time, letting other jobs launch tasks instead. We find that delay scheduling achieves nearly optimal data locality in a variety of workloads and can increase throughput by up to 2x while preserving fairness. In addition, the simplicity of delay scheduling makes it applicable under a wide variety of scheduling policies beyond fair sharing. }, author = {Matei Zaharia and Dhruba Borthakur and Sen Sarma, Joydeep and Khaled Elmeleey and Scott Shenker and Ion Stoica} }