@article{li2021prefix,
author = {Xiang Lisa Li and Percy Liang},
journal = {arXiv},
title = {Prefix-Tuning: Optimizing Continuous Prompts for Generation},
year = {2021},
}
@inproceedings{jones2021selective,
author = {Erik Jones and Shiori Sagawa and Pang Wei Koh and Ananya Kumar and Percy Liang},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Selective Classification Can Magnify Disparities Across Groups},
year = {2021},
}
@inproceedings{xie2021innout,
author = {Sang Michael Xie and Ananya Kumar and Robert Jones and Fereshte Khani and Tengyu Ma and Percy Liang},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {In-{N}-Out: Pre-Training and Self-Training using Auxiliary Information for Out-of-Distribution Robustness},
year = {2021},
}
@inproceedings{khani2021removing,
author = {Fereshte Khani and Percy Liang},
booktitle = {ACM Conference on Fairness, Accountability, and Transparency (FAccT)},
title = {Removing Spurious Features can Hurt Accuracy and Affect Groups Disproportionately},
year = {2021},
}
@inproceedings{gu2021beyond,
author = {Yu Gu and Sue Kase and Michelle T. Vanni and Brian M. Sadler and Percy Liang and Xifeng Yan and Yu Su},
booktitle = {World Wide Web (WWW)},
title = {Beyond I.I.D.: Three Levels of Generalization for Question Answering on Knowledge Bases},
year = {2021},
}
WILDS: a benchmark of in-the-wild distribution shifts.
Pang Wei Koh*, Shiori Sagawa*, Henrik Marklund, Sang Michael Xie, Marvin Zhang, Akshay Balsubramani, Weihua Hu, Michihiro Yasunaga, Richard Lanas Phillips, Sara Beery, Jure Leskovec, Anshul Kundaje, Emma Pierson, Sergey Levine, Chelsea Finn, Percy Liang.
arXiv, 2020.
@article{koh2020wilds,
author = {Pang Wei Koh and Shiori Sagawa and Henrik Marklund and Sang Michael Xie and Marvin Zhang and Akshay Balsubramani and Weihua Hu and Michihiro Yasunaga and Richard Lanas Phillips and Sara Beery and Jure Leskovec and Anshul Kundaje and Emma Pierson and Sergey Levine and Chelsea Finn and Percy Liang},
journal = {arXiv},
title = {{WILDS}: A Benchmark of in-the-Wild Distribution Shifts},
year = {2020},
}
@article{xie2020outputs,
author = {Sang Michael Xie and Tengyu Ma and Percy Liang},
journal = {arXiv},
title = {Simplifying Models with Unlabeled Output Data},
year = {2020},
}
@inproceedings{karamcheti2020decomposition,
author = {Sidd Karamcheti and Dorsa Sadigh and Percy Liang},
booktitle = {EMNLP Workshop for Interactive and Executable Semantic Parsing (IntEx-SemPar)},
title = {Learning Adaptive Language Interfaces through Decomposition},
year = {2020},
}
@article{liu2020explore,
author = {Evan Zheran Liu and Aditi Raghunathan and Percy Liang and Chelsea Finn},
journal = {arXiv preprint arXiv:2008.02790},
title = {Explore then Execute: Adapting without Rewards via Factorized Meta-Reinforcement Learning},
year = {2020},
}
@article{liu2020learning,
author = {Evan Zheran Liu and Ramtin Keramati and Sudarshan Seshadri and Kelvin Guu and Panupong Pasupat and Emma Brunskill and Percy Liang},
journal = {arXiv preprint arXiv:2007.05896},
title = {Learning Abstract Models for Strategic Exploration and Fast Reward Transfer},
year = {2020},
}
@inproceedings{mussmann2020pairwise,
author = {Stephen Mussmann and Robin Jia and Percy Liang},
booktitle = {Findings of Empirical Methods in Natural Language Processing (Findings of EMNLP)},
title = {On the Importance of Adaptive Data Collection for Extremely Imbalanced Pairwise Tasks},
year = {2020},
}
@inproceedings{hewitt2020rnn,
author = {John Hewitt and Michael Hahn and Surya Ganguli and Percy Liang and Christopher D. Manning},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {{RNN}s can generate bounded hierarchical languages with optimal memory},
year = {2020},
}
The EOS decision and length extrapolation.
Benjamin Newman, John Hewitt, Percy Liang, Christopher D. Manning.
Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP, 2020. Outstanding paper award.
@inproceedings{newman2020eos,
author = {Benjamin Newman and John Hewitt and Percy Liang and Christopher D. Manning},
booktitle = {Proceedings of the Third BlackboxNLP Workshop on Analyzing and Interpreting Neural Networks for NLP},
title = {The {EOS} Decision and Length Extrapolation},
year = {2020},
}
@inproceedings{dathathri2020sdp,
author = {Sumanth Dathathri and Krishnamurthy Dvijotham and Alexey Kurakin and Aditi Raghunathan and Jonathan Uesato and Rudy Bunel and Shreya Shankar and Jacob Steinhardt and Ian Goodfellow and Percy Liang and Pushmeet Kohli},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Enabling certification of verification-agnostic networks via memory-efficient semidefinite programming},
year = {2020},
}
Task-Oriented dialogue as dataflow synthesis.
Semantic Machines, Jacob Andreas, John Bufe, David Burkett, Charles Chen, Josh Clausman, Jean Crawford, Kate Crim, Jordan DeLoach, Leah Dorner, Jason Eisner, Hao Fang, Alan Guo, David Hall, Kristin Hayes, Kellie Hill, Diana Ho, Wendy Iwaszuk, Smriti Jha, Dan Klein, Jayant Krishnamurthy, Theo Lanman, Percy Liang, Christopher H. Lin, Ilya Lintsbakh, Andy McGovern, Aleksandr Nisnevich, Adam Pauls, Dmitrij Petters, Brent Read, Dan Roth, Subhro Roy, Jesse Rusak, Beth Short, Div Slomin, Ben Snyder, Stephon Striplin, Yu Su, Zachary Tellman, Sam Thomson, Andrei Vorobev, Izabela Witoszko, Jason Wolfe, Abby Wray, Yuchen Zhang, Alexander Zotov.
Transactions of the Association for Computational Linguistics (TACL), 2020.
@article{semanticmachines2020dataflow,
author = {Semantic Machines and Jacob Andreas and John Bufe and David Burkett and Charles Chen and Josh Clausman and Jean Crawford and Kate Crim and Jordan DeLoach and Leah Dorner and Jason Eisner and Hao Fang and Alan Guo and David Hall and Kristin Hayes and Kellie Hill and Diana Ho and Wendy Iwaszuk and Smriti Jha and Dan Klein and Jayant Krishnamurthy and Theo Lanman and Percy Liang and Christopher H. Lin and Ilya Lintsbakh and Andy McGovern and Aleksandr Nisnevich and Adam Pauls and Dmitrij Petters and Brent Read and Dan Roth and Subhro Roy and Jesse Rusak and Beth Short and Div Slomin and Ben Snyder and Stephon Striplin and Yu Su and Zachary Tellman and Sam Thomson and Andrei Vorobev and Izabela Witoszko and Jason Wolfe and Abby Wray and Yuchen Zhang and Alexander Zotov},
journal = {Transactions of the Association for Computational Linguistics (TACL)},
title = {Task-Oriented Dialogue as Dataflow Synthesis},
volume = {8},
year = {2020},
}
@inproceedings{sagawa2020overparameterization,
author = {Shiori Sagawa and Aditi Raghunathan and Pang Wei Koh and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {An investigation of why overparameterization exacerbates spurious correlations},
year = {2020},
}
Concept bottleneck models.
Pang Wei Koh*, Thao Nguyen*, Yew Siang Tang*, Stephen Mussmann, Emma Pierson, Been Kim, Percy Liang.
International Conference on Machine Learning (ICML), 2020.
@inproceedings{koh2020bottleneck,
author = {Pang Wei Koh and Thao Nguyen and Yew Siang Tang and Stephen Mussmann and Emma Pierson and Been Kim and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Concept Bottleneck Models},
year = {2020},
}
@inproceedings{khani2020noise,
author = {Fereshte Khani and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Feature Noise Induces Loss Discrepancy Across Groups},
year = {2020},
}
@inproceedings{yasunaga2020repair,
author = {Michi Yasunaga and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Graph-based, Self-Supervised Program Repair from Diagnostic Feedback},
year = {2020},
}
@inproceedings{raghunathan2020understanding,
author = {Aditi Raghunathan and Sang Michael Xie and Fanny Yang and John C. Duchi and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Understanding and Mitigating the Tradeoff Between Robustness and Accuracy},
year = {2020},
}
@inproceedings{kumar2020gradual,
author = {Ananya Kumar and Tengyu Ma and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Understanding Self-Training for Gradual Domain Adaptation},
year = {2020},
}
@inproceedings{srivasta2020human,
author = {Megha Srivastava and Tatsunori Hashimoto and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Robustness to Spurious Correlations via Human Annotations},
year = {2020},
}
@inproceedings{jones2020roben,
author = {Erik Jones and Robin Jia and Aditi Raghunathan and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Robust Encodings: A Framework for Combating Adversarial Typos},
year = {2020},
}
@inproceedings{kamath2020squads,
author = {Amita Kamath and Robin Jia and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Selective Question Answering under Domain Shift},
year = {2020},
}
@inproceedings{mu2020shaping,
author = {Jesse Mu and Percy Liang and Noah Goodman},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Shaping Visual Representations with Language for Few-shot Classification},
year = {2020},
}
@inproceedings{murty2020expbert,
author = {Shikhar Murty and Pang Wei Koh and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {{ExpBERT}: Representation Engineering with Natural Language Explanations},
year = {2020},
}
@inproceedings{donahue2020infilling,
author = {Chris Donahue and Mina Lee and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Enabling Language Models to Fill in the Blanks},
year = {2020},
}
@inproceedings{sagawa2020group,
author = {Shiori Sagawa and Pang Wei Koh and Tatsunori B. Hashimoto and Percy Liang},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Distributionally Robust Neural Networks for Group Shifts: On the Importance of Regularization for Worst-Case Generalization},
year = {2020},
}
@inproceedings{hu2020pretraining,
author = {Weihua Hu and Bowen Liu and Joseph Gomes and Marinka Zitnik and Percy Liang and Vijay Pande and Jure Leskovec},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Strategies for Pre-training Graph Neural Networks},
year = {2020},
}
@inproceedings{coleman2020selection,
author = {Cody Coleman and Christopher Yeh and Stephen Mussmann and Baharan Mirzasoleiman and Peter Bailis and Percy Liang and Jure Leskovec and Matei Zaharia},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Selection via Proxy: Efficient Data Selection for Deep Learning},
year = {2020},
}
@inproceedings{li2020greedy,
author = {Ray Li and Percy Liang and Stephen Mussmann},
booktitle = {Symposium on Discrete Algorithms (SODA)},
title = {A Tight Analysis of Greedy Yields Subexponential Time Approximation for Uniform Decision Tree},
year = {2020},
}
@inproceedings{jia2019certified,
author = {Robin Jia and Aditi Raghunathan and Kerem Göksel and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Certified Robustness to Adversarial Word Substitutions},
year = {2019},
}
@inproceedings{oren2019drolm,
author = {Yonatan Oren and Shiori Sagawa and Tatsunori Hashimoto and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Distributionally Robust Language Modeling},
year = {2019},
}
@inproceedings{hewitt2019control,
author = {John Hewitt and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Designing and Interpreting Probes with Control Tasks},
year = {2019},
}
@inproceedings{kulal2019spoc,
author = {Sumith Kulal and Panupong Pasupat and Kartik Chandra and Mina Lee and Oded Padon and Alex Aiken and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {SPoC: Search-based Pseudocode to Code},
year = {2019},
}
@inproceedings{carmon2019unlabeled,
author = {Yair Carmon and Aditi Raghunathan and Ludwig Schmidt and Percy Liang and John C. Duchi},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Unlabeled Data Improves Adversarial Robustness},
year = {2019},
}
@inproceedings{kumar2019calibration,
author = {Ananya Kumar and Percy Liang and Tengyu Ma},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Verified Uncertainty Calibration},
year = {2019},
}
@inproceedings{koh2019influence,
author = {Pang Wei Koh and Kai-Siang Ang and Hubert H. K. Teo and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {On the Accuracy of Influence Functions for Measuring Group Effects},
year = {2019},
}
@inproceedings{lee2019autocomplete,
author = {Mina Lee and Tatsunori Hashimoto and Percy Liang},
booktitle = {Emergent Communication Workshop at Neural Information Processing Systems (NeurIPS)},
title = {Learning Autocomplete Systems as a Communication Game},
year = {2019},
}
@article{raghunathan2019hurt,
author = {Aditi Raghunathan and Sang Michael Xie and Fanny Yang and John C. Duchi and Percy Liang},
journal = {arXiv preprint arXiv:1906.06032},
title = {Adversarial Training Can Hurt Generalization},
year = {2019},
}
@article{monajemi2019painless,
author = {Hatef Monajemi and Riccardo Murri and Eric Jonas and Percy Liang and Victoria Stodden and David L. Donoho},
journal = {Harvard Data Science Review},
title = {Ambitious Data Science Can Be Painless},
volume = {1},
year = {2019},
}
@inproceedings{hashimoto2019huse,
author = {Tatsu Hashimoto and Hugh Zhang and Percy Liang},
booktitle = {North American Association for Computational Linguistics (NAACL)},
title = {Unifying Human and Statistical Evaluation for Natural Language Generation},
year = {2019},
}
@inproceedings{peng2019pun,
author = {Nanyun Peng and He He and Percy Liang},
booktitle = {North American Association for Computational Linguistics (NAACL)},
title = {Pun Generation with Surprise},
year = {2019},
}
@article{koh2019stronger,
author = {Pang Wei Koh and Jacob Steinhardt and Percy Liang},
journal = {arXiv preprint arXiv:1811.00741},
title = {Stronger Data Poisoning Attacks Break Data Sanitization Defenses},
year = {2019},
}
@inproceedings{selsam2019sat,
author = {Daniel Selsam and Matthew Lamm and Benedikt Bünz and Percy Liang and Leonardo de Moura and David L. Dill},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Learning a {SAT} Solver from Single-Bit Supervision},
year = {2019},
}
@inproceedings{zhang2019discretization,
author = {Yuchen Zhang and Percy Liang},
booktitle = {Artificial Intelligence and Statistics (AISTATS)},
title = {Defending against Whitebox Adversarial Attacks via Randomized Discretization},
year = {2019},
}
@inproceedings{pierson2019aging,
author = {Emma Pierson and Pang Wei Koh and Tatsunori Hashimoto and Daphne Koller and Jure Leskovec and Nick Eriksson and Percy Liang},
booktitle = {Artificial Intelligence and Statistics (AISTATS)},
title = {Inferring Multidimensional Rates of Aging from Cross-Sectional Data},
year = {2019},
}
@inproceedings{shi2019frangel,
author = {Kensen Shi and Jacob Steinhardt and Percy Liang},
booktitle = {Principles of Programming Languages (POPL)},
title = {{F}r{A}ngel: Component-Based Synthesis with Control Structures},
year = {2019},
}
@inproceedings{raghunathan2018sdp,
author = {Aditi Raghunathan and Jacob Steinhardt and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Semidefinite relaxations for certifying robustness to adversarial examples},
year = {2018},
}
@inproceedings{mussmann2018sgd,
author = {Stephen Mussmann and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Uncertainty Sampling is Preconditioned Stochastic Gradient Descent on Zero-One Loss},
year = {2018},
}
@inproceedings{hashimoto2018edit,
author = {Tatsunori Hashimoto and Kelvin Guu and Yonatan Oren and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {A Retrieve-and-Edit Framework for Predicting Structured Outputs},
year = {2018},
}
QuAC: question answering in context.
Eunsol Choi, He He, Mohit Iyyer, Mark Yatskar, Wen-tau Yih, Yejin Choi, Percy Liang, Luke Zettlemoyer.
Empirical Methods in Natural Language Processing (EMNLP), 2018.
@inproceedings{choi2018quac,
author = {Eunsol Choi and He He and Mohit Iyyer and Mark Yatskar and Wen-tau Yih and Yejin Choi and Percy Liang and Luke Zettlemoyer},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {{QuAC}: Question Answering in Context},
year = {2018},
}
@inproceedings{he2018negotiation,
author = {He He and Derek Chen and Anusha Balakrishnan and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Decoupling Strategy and Generation in Negotiation Dialogues},
year = {2018},
}
@inproceedings{pasupat2018elements,
author = {Panupong Pasupat and Tian-Shun Jiang and Evan Liu and Kelvin Guu and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Mapping Natural Language Commands to Web Elements},
year = {2018},
}
@inproceedings{lamm2018tap,
author = {Matthew Lamm and Arun Chaganty and Christopher D. Manning and Dan Jurafsky and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Textual Analogy Parsing: What's Shared and What's Compared among Analogous Facts},
year = {2018},
}
@inproceedings{mussmann2018accuracy,
author = {Stephen Mussmann and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {On the Relationship between Data Efficiency and Error in Active Learning},
year = {2018},
}
@inproceedings{hashimoto2018repeated,
author = {Tatsunori B. Hashimoto and Megha Srivastava and Hongseok Namkoong and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Fairness Without Demographics in Repeated Loss Minimization},
year = {2018},
}
@inproceedings{hancock2018babble,
author = {Braden Hancock and Paroma Varma and Stephanie Wang and Martin Bringmann and Percy Liang and Christopher Ré},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Training Classifiers with Natural Language Explanations},
year = {2018},
}
@inproceedings{chaganty2018evaluation,
author = {Arun Chaganty and Stephen Mussmann and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {The price of debiasing automatic metrics in natural language evaluation},
year = {2018},
}
@inproceedings{rajpurkar2018squadrun,
author = {Pranav Rajpurkar and Robin Jia and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Know What You Don't Know: Unanswerable Questions for {SQuAD}},
year = {2018},
}
@article{khani2018pip,
author = {Fereshte Khani and Noah D. Goodman and Percy Liang},
journal = {Transactions of the Association for Computational Linguistics (TACL)},
title = {Planning, Inference and Pragmatics in Sequential Language Games},
volume = {6},
year = {2018},
}
@article{guu2018edit,
author = {Kelvin Guu and Tatsunori B. Hashimoto and Yonatan Oren and Percy Liang},
journal = {Transactions of the Association for Computational Linguistics (TACL)},
title = {Generating Sentences by Editing Prototypes},
volume = {0},
year = {2018},
}
@inproceedings{li2018style,
author = {Juncen Li and Robin Jia and He He and Percy Liang},
booktitle = {North American Association for Computational Linguistics (NAACL)},
title = {Delete, Retrieve, Generate: A Simple Approach to Sentiment and Style Transfer},
year = {2018},
}
@inproceedings{liu2018workflow,
author = {Evan Zheran Liu and Kelvin Guu and Panupong Pasupat and Tianlin Shi and Percy Liang},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Reinforcement Learning on Web Interfaces using Workflow-Guided Exploration},
year = {2018},
}
@inproceedings{raghunathan2018certified,
author = {Aditi Raghunathan and Jacob Steinhardt and Percy Liang},
booktitle = {International Conference on Learning Representations (ICLR)},
title = {Certified defenses against adversarial examples},
year = {2018},
}
@inproceedings{bastani2018active,
author = {Osbert Bastani and Rahul Sharma and Alex Aiken and Percy Liang},
booktitle = {Programming Language Design and Implementation (PLDI)},
title = {Active Learning of Points-To Specifications},
year = {2018},
}
@inproceedings{sharan2018prediction,
author = {Vatsal Sharan and Sham Kakade and Percy Liang and Gregory Valiant},
booktitle = {Symposium on Theory of Computing (STOC)},
title = {Prediction with a Short Memory},
year = {2018},
}
@inproceedings{steinhardt2017certified,
author = {Jacob Steinhardt and Pang Wei Koh and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Certified Defenses for Data Poisoning Attacks},
year = {2017},
}
@inproceedings{hashimoto2017transformation,
author = {Tatsunori B. Hashimoto and John Duchi and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Unsupervised Transformation Learning via Convex Relaxations},
year = {2017},
}
Learning overcomplete HMMs.
Vatsal Sharan, Sham Kakade, Percy Liang, Gregory Valiant.
Advances in Neural Information Processing Systems (NeurIPS), 2017.
@inproceedings{sharan2017overcomplete,
author = {Vatsal Sharan and Sham Kakade and Percy Liang and Gregory Valiant},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Learning Overcomplete {HMM}s},
year = {2017},
}
@inproceedings{jia2017adversarial,
author = {Robin Jia and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Adversarial Examples for Evaluating Reading Comprehension Systems},
year = {2017},
}
@inproceedings{zhang2017macro,
author = {Yuchen Zhang and Panupong Pasupat and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Macro Grammars and Holistic Triggering for Efficient Semantic Parsing},
year = {2017},
}
@inproceedings{chaganty2017unbiased,
author = {Arun Chaganty and Ashwin Paranjape and Percy Liang and Chris Manning},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Importance sampling for unbiased on-demand evaluation of knowledge base population},
year = {2017},
}
@inproceedings{zhang2017convexified,
author = {Yuchen Zhang and Percy Liang and Martin J. Wainwright},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Convexified Convolutional Neural Networks},
year = {2017},
}
@inproceedings{selsam2017bugfree,
author = {Daniel Selsam and Percy Liang and David Dill},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Developing Bug-Free Machine Learning Systems With Formal Mathematics},
year = {2017},
}
@inproceedings{shi2017wob,
author = {Tianlin Shi and Andrej Karpathy and Linxi Fan and Jonathan Hernandez and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {World of Bits: An Open-Domain Platform for Web-Based Agents},
year = {2017},
}
@inproceedings{zhang2017hitting,
author = {Yuchen Zhang and Percy Liang and Moses Charikar},
booktitle = {Conference on Learning Theory (COLT)},
title = {A Hitting Time Analysis of Stochastic Gradient {L}angevin Dynamics},
year = {2017},
}
@inproceedings{bastani2017synthesizing,
author = {Osbert Bastani and Rahul Sharma and Alex Aiken and Percy Liang},
booktitle = {Programming Language Design and Implementation (PLDI)},
title = {Synthesizing Program Input Grammars},
year = {2017},
}
@inproceedings{wang2017naturalizing,
author = {Sida I. Wang and Sam Ginn and Percy Liang and Christopher D. Manning},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Naturalizing a Programming Language via Interactive Learning},
year = {2017},
}
@inproceedings{he2017symmetric,
author = {He He and Anusha Balakrishnan and Mihail Eric and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
pages = {1766--1776},
title = {Learning Symmetric Collaborative Dialogue Agents with Dynamic Knowledge Graph Embeddings},
year = {2017},
}
@inproceedings{guu2017bridging,
author = {Kelvin Guu and Panupong Pasupat and Evan Zheran Liu and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {From Language to Programs: Bridging Reinforcement Learning and Maximum Marginal Likelihood},
year = {2017},
}
@inproceedings{steinhardt2016risk,
author = {Jacob Steinhardt and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Unsupervised Risk Estimation Using Only Conditional Independence Structure},
year = {2016},
}
@inproceedings{rajpurkar2016squad,
author = {Pranav Rajpurkar and Jian Zhang and Konstantin Lopyrev and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {{SQuAD}: 100,000+ Questions for Machine Comprehension of Text},
year = {2016},
}
@inproceedings{wang2016games,
author = {Sida I. Wang and Percy Liang and Chris Manning},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Learning Language Games through Interaction},
year = {2016},
}
@inproceedings{long2016projections,
author = {Reginald Long and Panupong Pasupat and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Simpler Context-Dependent Logical Forms via Model Projections},
year = {2016},
}
@inproceedings{pasupat2016inferring,
author = {Panupong Pasupat and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Inferring Logical Forms From Denotations},
year = {2016},
}
@inproceedings{khani2016unanimity,
author = {Fereshte Khani and Martin Rinard and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Unanimous Prediction for 100% Precision with Application to Learning Semantic Mappings},
year = {2016},
}
@inproceedings{chaganty2016perspectives,
author = {Arun Tejasvi Chaganty and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {How Much is 131 Million Dollars? {P}utting Numbers in Perspective with Compositional Descriptions},
year = {2016},
}
@inproceedings{raghunathan2016linear,
author = {Aditi Raghunathan and Roy Frostig and John Duchi and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Estimation from Indirect Supervision with Linear Moments},
year = {2016},
}
@incollection{wager2016levy,
author = {Stefan Wager and Will Fithian and Percy Liang},
booktitle = {Perturbations, Optimization and Statistics},
title = {Data Augmentation via {L}évy Processes},
year = {2016},
}
@article{liang2016executable,
author = {Percy Liang},
journal = {Communications of the ACM},
title = {Learning Executable Semantic Parsers for Natural Language Understanding},
volume = {59},
year = {2016},
}
@inproceedings{wang2015overnight,
author = {Yushi Wang and Jonathan Berant and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Building a Semantic Parser Overnight},
year = {2015},
}
@article{berant2015agenda,
author = {Jonathan Berant and Percy Liang},
journal = {Transactions of the Association for Computational Linguistics (TACL)},
pages = {545--558},
title = {Imitation Learning of Agenda-Based Semantic Parsers},
volume = {3},
year = {2015},
}
@inproceedings{steinhardt2015relaxed,
author = {Jacob Steinhardt and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Learning with Relaxed Supervision},
year = {2015},
}
@inproceedings{wang2015polynomial,
author = {Sida I. Wang and Arun Chaganty and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Estimating Mixture Models via Mixture of Polynomials},
year = {2015},
}
@inproceedings{werling2015onthejob,
author = {Keenon Werling and Arun Chaganty and Percy Liang and Chris Manning},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {On-the-Job Learning with {B}ayesian Decision Theory},
year = {2015},
}
@inproceedings{kuleshov2015calibrated,
author = {Volodymyr Kuleshov and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Calibrated Structured Prediction},
year = {2015},
}
@inproceedings{guu2015traversing,
author = {Kelvin Guu and John Miller and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Traversing Knowledge Graphs in Vector Space},
year = {2015},
}
@inproceedings{misra2015environment,
author = {Dipendra K. Misra and Kejia Tao and Percy Liang and Ashutosh Saxena},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Environment-Driven Lexicon Induction for High-Level Instructions},
year = {2015},
}
@inproceedings{shi2015sample,
author = {Tianlin Shi and Jacob Steinhardt and Percy Liang},
booktitle = {Artificial Intelligence and Statistics (AISTATS)},
pages = {875--884},
title = {Learning Where To Sample in Structured Prediction},
year = {2015},
}
@inproceedings{kuleshov2015tensor,
author = {Volodymyr Kuleshov and Arun Chaganty and Percy Liang},
booktitle = {Artificial Intelligence and Statistics (AISTATS)},
title = {Tensor factorization via matrix factorization},
year = {2015},
}
@article{steinhardt2014sparse,
author = {Jacob Steinhardt and Stefan Wager and Percy Liang},
journal = {arXiv preprint arXiv:1412.4182},
title = {The Statistics of Streaming Sparse Regression},
year = {2014},
}
@inproceedings{ramanathan2014linking,
author = {Vignesh Ramanathan and Armand Joulin and Percy Liang and Li Fei-Fei},
booktitle = {European Conference on Computer Vision (ECCV)},
title = {Linking people with "their" names using coreference resolution},
year = {2014},
}
@article{liang2014talking,
author = {Percy Liang},
journal = {XRDS: Crossroads, The ACM Magazine for Students},
number = {1},
pages = {18--21},
title = {Talking to computers in natural language},
volume = {21},
year = {2014},
}
@inproceedings{berant2014paraphrasing,
author = {Jonathan Berant and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Semantic Parsing via Paraphrasing},
year = {2014},
}
@inproceedings{pasupat2014extraction,
author = {Panupong Pasupat and Percy Liang},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Zero-shot Entity Extraction from Web Pages},
year = {2014},
}
@inproceedings{chaganty2014graphical,
author = {Arun Chaganty and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Estimating Latent-Variable Graphical Models using Moments and Likelihoods},
year = {2014},
}
@inproceedings{steinhardt2014eg,
author = {Jacob Steinhardt and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Adaptivity and Optimism: An Improved Exponentiated Gradient Algorithm},
year = {2014},
}
@inproceedings{wager2014altitude,
author = {Stefan Wager and Will Fithian and Sida I. Wang and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Altitude Training: Strong Bounds for Single-Layer Dropout},
year = {2014},
}
@inproceedings{frostig2014lowrank,
author = {Roy Frostig and Sida I. Wang and Percy Liang and Chris Manning},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Simple {MAP} inference via low-rank relaxations},
year = {2014},
}
@inproceedings{wang2014iqp,
author = {Sida I. Wang and Roy Frostig and Percy Liang and Chris Manning},
booktitle = {International Conference on Learning Representations Workshop (ICLR)},
title = {Relaxations for inference in restricted {B}oltzmann machines},
year = {2014},
}
@inproceedings{berant2013freebase,
author = {Jonathan Berant and Andrew Chou and Roy Frostig and Percy Liang},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Semantic Parsing on {F}reebase from Question-Answer Pairs},
year = {2013},
}
@inproceedings{wang2013noising,
author = {Sida I. Wang and Mengqiu Wang and Stefan Wager and Percy Liang and Chris Manning},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {Feature Noising for Log-linear Structured Prediction},
year = {2013},
}
@inproceedings{wager2013dropout,
author = {Stefan Wager and Sida I. Wang and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Dropout Training as Adaptive Regularization},
year = {2013},
}
@inproceedings{chaganty13regression,
author = {Arun Chaganty and Percy Liang},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Spectral Experts for Estimating Mixtures of Linear Regressions},
year = {2013},
}
@inproceedings{ramanathan2013event,
author = {Vignesh Ramanathan and Percy Liang and Li Fei-Fei},
booktitle = {International Conference on Computer Vision (ICCV)},
title = {Video Event Understanding using Natural Language Descriptions},
year = {2013},
}
@inproceedings{sharma13algebraic,
author = {Rahul Sharma and Saurabh Gupta and Bharath Hariharan and Alex Aiken and Percy Liang and Aditya V. Nori},
booktitle = {European Symposium on Programming (ESOP)},
title = {A Data Driven Approach for Algebraic Loop Invariants},
year = {2013},
}
This paper explores unsupervised learning of parsing models along two directions. First, which models are identifiable from infinite data? We use a general technique for numerically checking identifiability based on the rank of a Jacobian matrix, and apply it to several standard constituency and dependency parsing models. Second, for identifiable models, how do we estimate the parameters efficiently? EM suffers from local optima, while recent work using spectral methods cannot be directly applied since the topology of the parse tree varies across sentences. We develop a strategy, unmixing, which deals with this additional complexity for restricted classes of parsing models.
@inproceedings{hsu12identifiability,
author = {Daniel Hsu and Sham M. Kakade and Percy Liang},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Identifiability and Unmixing of Latent Parse Trees},
year = {2012},
}
Compositional question answering begins by mapping questions to logical forms, but training a semantic parser to perform this mapping typically requires the costly annotation of the target logical forms. In this paper, we learn to map questions to answers via latent logical forms, which are induced automatically from question-answer pairs. In tackling this challenging learning problem, we introduce a new semantic representation which highlights a parallel between dependency syntax and efficient evaluation of logical forms. On two standard semantic parsing benchmarks (GEO and JOBS), our system obtains the highest published accuracies, despite using less supervision than existing systems.
Task: learn to map questions to answers via latent logical forms.
Contribution: new tree-based semantic representation.
Result: surpass state-of-the-art on semantic parsing with less supervision.
@inproceedings{liang11dcs,
author = {Percy Liang and Michael I. Jordan and Dan Klein},
booktitle = {Association for Computational Linguistics (ACL)},
pages = {590--599},
title = {Learning Dependency-Based Compositional Semantics},
year = {2011},
}
Many static analyses do not scale as they are made more precise. For example, increasing the amount of context sensitivity in a k-limited pointer analysis causes the number of contexts to grow exponentially with k. Iterative refinement techniques can mitigate this growth by starting with a coarse abstraction and only refining parts of the abstraction that are deemed relevant with respect to a given client.
In this paper, we introduce a new technique called pruning that uses client feedback in a different way. The basic idea is to use coarse abstractions to prune away parts of the program analysis deemed irrelevant for proving a client query, and then using finer abstractions on the sliced program analysis. For a k-limited pointer analysis, this approach amounts to adaptively refining and pruning a set of prefix patterns representing the contexts relevant for the client. By pruning, we are able to scale up to much more expensive abstractions than before. We also prove that the pruned analysis is both sound and complete, that is, it yields the same results as an analysis that uses a more expensive abstraction directly without pruning.
Idea: run cheap analysis, use client feedback to prune away irrelvant parts of program analysis (think program slicing); then run expensive analysis.
Theoretical result: pruning is sound and complete.
Empirical result: we can use much richer $k$-object-sensitivity abstractions.
@inproceedings{liang11pruning,
author = {Percy Liang and Mayur Naik},
booktitle = {Programming Language Design and Implementation (PLDI)},
title = {Scaling up Abstraction Refinement via Pruning},
year = {2011},
}
Static analyses are generally parametrized by an abstraction which is chosen from a family of abstractions. We are interested in flexible families of abstractions with many parameters, as these families can allow one to increase precision in ways tailored to the client without sacrificing scalability. For example, we consider k-limited points-to analyses where each call site and allocation site in a program can have a different k value. We then ask a natural question in this paper: What is the minimal (coarsest) abstraction in a given family which is able to prove a set of queries? In addressing this question, we make the following two contributions: (i) We introduce two machine learning algorithms for efficiently finding a minimal abstraction; and (ii) for a static race detector backed by a k-limited points-to analysis, we show empirically that minimal abstractions are actually quite coarse: It suffices to provide context/object sensitivity to a very small fraction (0.4--2.3%) of the sites to yield equally precise results as providing context/object sensitivity uniformly to all sites.
Question: how small is the smallest abstraction needed to prove a query?
Empirical answer: very small (less than 2.5% sites need to be treated context-sensitively for k-limited analyses for race detection).
Found this answer using a new machine learning algorithm that exploits this sparsity.
@inproceedings{liang11minimal,
author = {Percy Liang and Omer Tripp and Mayur Naik},
booktitle = {Principles of Programming Languages (POPL)},
title = {Learning Minimal Abstractions},
year = {2011},
}
@inproceedings{golland2010pragmatics,
author = {Dave Golland and Percy Liang and Dan Klein},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
pages = {410--419},
title = {A Game-theoretic Approach to Generating Spatial Descriptions},
year = {2010},
}
We present a simple, robust generation system which performs content selection and surface realization in a unified, domain-independent framework. In our approach, we break up the end-to-end generation process into a sequence of local decisions, arranged hierarchically and each trained discriminatively. We deployed our system in three different domains---Robocup sportscasting, technical weather forecasts, and common weather forecasts, obtaining results comparable to state-of-the-art domain-specific systems both in terms of BLEU scores and human evaluation.
Model natural language generation as a sequence of local decisions, each backed by a log-linear model.
Advantage: can use arbitrary expressive features, works across multiple domains.
@inproceedings{angeli10generation,
author = {Gabor Angeli and Percy Liang and Dan Klein},
booktitle = {Empirical Methods in Natural Language Processing (EMNLP)},
title = {A Simple Domain-Independent Probabilistic Approach to Generation},
year = {2010},
}
The quality of a static analysis of heap-manipulating programs is largely determined by its heap abstraction. Object allocation sites are a commonly-used abstraction, but are too coarse for some clients. The goal of this paper is to investigate how various refinements of allocation sites can improve precision. In particular, we consider abstractions that use call stack, object recency, and heap connectivity information. We measure the precision of these abstractions dynamically for four different clients motivated by concurrency and on nine Java programs chosen from the DaCapo benchmark suite. Our dynamic results shed new light on aspects of heap abstractions that matter for precision, which allows us to more effectively navigate the large space of possible heap abstractions.
Question: what aspects of a heap abstraction matter?
Methodology: run program (9 DaCapo benchmarks) dynamically, compute static heap abstractions (3 dimensions of refinement: context sensitivity, object recency, and shape analysis), answer client queries (4 clients based on concurrency).
@inproceedings{liang10abstraction,
author = {Percy Liang and Omer Tripp and Mayur Naik and Mooly Sagiv},
booktitle = {Object-Oriented Programming, Systems, Languages, and Applications (OOPSLA)},
title = {A Dynamic Evaluation of Static Heap Abstractions},
year = {2010},
}
We are interested in learning programs for multiple related tasks given only a few training examples per task. Since the program for a single task is underdetermined by its data, we introduce a nonparametric hierarchical Bayesian prior over programs which shares statistical strength across multiple tasks. The key challenge is to parametrize this multi-task sharing. For this, we introduce a new representation of programs based on combinatory logic and provide an MCMC algorithm that can perform safe program transformations on this representation to reveal shared inter-program substructures.
Programs are trees, subprograms are subtrees, which can be shared across tasks. Combinators refactor programs to expose the appropriate subprograms.
@inproceedings{liang10programs,
author = {Percy Liang and Michael I. Jordan and Dan Klein},
booktitle = {International Conference on Machine Learning (ICML)},
pages = {639--646},
title = {Learning Programs: A Hierarchical {B}ayesian Approach},
year = {2010},
}
A learning problem might have several measures of complexity (e.g., norm and dimensionality) that affect the generalization error. What is the interaction between these complexities? Dimension-free learning theory bounds and parametric asymptotic analyses each provide a partial picture of the full learning curve. In this paper, we use high-dimensional asymptotics on two classical problems---mean estimation and linear regression---to explore the learning curve more completely. We show that these curves exhibit multiple regimes, where in each regime, the excess risk is controlled by a subset of the problem complexities.
Goal: understand excess risk as a function of sample size and problem complexity. On simple examples, show that asymptotic risk has multiple regimes, each controlled by different complexities.
@inproceedings{liang10regimes,
author = {Percy Liang and Nati Srebro},
booktitle = {International Conference on Machine Learning (ICML)},
title = {On the Interaction between Norm and Dimensionality: Multiple Regimes in Learning},
year = {2010},
}
Most existing algorithms for learning latent-variable models---such as EM and existing Gibbs samplers---are token-based, meaning that they update the variables associated with one sentence at a time. The incremental nature of these methods makes them susceptible to local optima/slow mixing. In this paper, we introduce a type-based sampler, which updates a block of variables, identified by a type, which spans multiple sentences. We show improvements on part-of-speech induction, word segmentation, and learning tree-substitution grammars.
NLP perspective: goal is to avoid local optima by processing all tokens associated with a type at once instead of one token or sentence at a time.
Sampling perspective: new type of block sampling that exploits exchangeability.
@inproceedings{liang10type,
author = {Percy Liang and Michael I. Jordan and Dan Klein},
booktitle = {North American Association for Computational Linguistics (NAACL)},
title = {Type-Based {MCMC}},
year = {2010},
}
Many types of regularization schemes have been employed in statistical learning, each one motivated by some assumption about the problem domain. In this paper, we present a unified asymptotic analysis of smooth regularizers, which allows us to see how the validity of these assumptions impacts the success of a particular regularizer. In addition, our analysis motivates an algorithm for optimizing regularization parameters, which in turn can be analyzed within our framework. We apply our analysis to several examples, including hybrid generative-discriminative learning and multi-task learning.
Setting: estimator defined by minimizing loss plus regularization.
Question: what is the best regularizer to use?
This is hard to optimize, so use a Taylor expansion instead, yielding a interpretable closed form solution.
@inproceedings{liang09regularization,
author = {Percy Liang and Francis Bach and Guillaume Bouchard and Michael I. Jordan},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Asymptotically Optimal Regularization in Smooth Parametric Models},
year = {2009},
}
Probabilistic context-free grammars (PCFGs) have played an important role in the modeling of syntax in natural language processing and other applications, but choosing the proper model complexity is often difficult. We present a nonparametric Bayesian generalization of the PCFG based on the hierarchical Dirichlet process (HDP). In our HDP-PCFG model, the effective complexity of the grammar can grow with increasing data. We describe an efficient variational inference algorithm for our model and present experiments on both a synthetic grammar induction task and a large-scale natural language parsing task.
Details of the EMNLP 2007 paper + general background, empirical intuitions, and derivations for structured mean-field + a small grammar induction experiment.
@incollection{liang09hdppcfg,
author = {Percy Liang and Michael I. Jordan and Dan Klein},
booktitle = {The Oxford Handbook of Applied Bayesian Analysis},
title = {Probabilistic grammars and hierarchical {D}irichlet processes},
year = {2009},
}
A central problem in grounded language acquisition is learning the correspondences between a rich world state and a stream of text which references that world state. To deal with the high degree of ambiguity present in this setting, we present a generative model that simultaneously segments the text into utterances and maps each utterance to a meaning representation grounded in the world state. We show that our model generalizes across three domains of increasing difficulty---Robocup sportscasting, weather forecasts (a new domain), and NFL recaps.
Stuff happens in the world. A text talks about it. Our goal: learn the correspondence between the two.
Approach: probabilistic model capturing identification of entities/events in the world, segmentation of the text, and alignment between the two.
@inproceedings{liang09semantics,
author = {Percy Liang and Michael I. Jordan and Dan Klein},
booktitle = {Association for Computational Linguistics and International Joint Conference on Natural Language Processing (ACL-IJCNLP)},
pages = {91--99},
title = {Learning Semantic Correspondences with Less Supervision},
year = {2009},
}
Given a model family and a set of unlabeled examples, one could either label specific examples or state general constraints---both provide information about the desired model. In general, what is the most cost-effective way to learn? To address this question, we introduce measurements, a general class of mechanisms for providing information about a target model. We present a Bayesian decision-theoretic framework, which allows us to both integrate diverse measurements and choose new measurements to make. We use a variational inference algorithm, which exploits exponential family duality. The merits of our approach are demonstrated on two sequence labeling tasks.
Goal: learning with minimum human effort.
Things human can do: label data, provide constraints---in general, make measurements.
Use Bayesian decision theory to choose optimal measurements.
@inproceedings{liang09measurements,
author = {Percy Liang and Michael I. Jordan and Dan Klein},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Learning from Measurements in Exponential Families},
year = {2009},
}
The (batch) EM algorithm plays an important role in unsupervised induction, but it sometimes suffers from slow convergence. In this paper, we show that online variants (1) provide significant speedups and (2) can even find better solutions than those found by batch EM. We support these findings on four unsupervised tasks: part-of-speech tagging, document classification, word segmentation, and word alignment.
What you'd expect: online is faster than batch.
What you might not expect: online gets better accuarcy than batch.
@inproceedings{liang09online,
author = {Percy Liang and Dan Klein},
booktitle = {North American Association for Computational Linguistics (NAACL)},
pages = {611--619},
title = {Online {EM} for Unsupervised Models},
year = {2009},
}
Statistical and computational concerns have motivated parameter estimators based on various forms of likelihood, e.g., joint, conditional, and pseudolikelihood. In this paper, we present a unified framework for studying these estimators, which allows us to compare their relative (statistical) efficiencies. Our asymptotic analysis suggests that modeling more of the data tends to reduce variance, but at the cost of being more sensitive to model misspecification. We present experiments validating our analysis.
Derive general expression for the asymptotic risk of composite likelihood estimators in exponential families.
This allows us to compare the various estimators.
@inproceedings{liang08asymptotics,
author = {Percy Liang and Michael I. Jordan},
booktitle = {International Conference on Machine Learning (ICML)},
pages = {584--591},
title = {An Asymptotic Analysis of Generative, Discriminative, and Pseudolikelihood Estimators},
year = {2008},
}
Structured models often achieve excellent performance but can be slow at test time. We investigate structure compilation, where we replace structure with features, which are often computationally simpler but unfortunately statistically more complex. We analyze this tradeoff theoretically and empirically on three natural language processing tasks. We also introduce a simple method to transfer predictive power from structure to features via unlabeled data, while incurring a minimal statistical penalty.
How much do we lose by throwing out edge features in CRFs and adding node features?
Studies the approximation, estimation, computational aspects of the tradeoff.
@inproceedings{liang08structure,
author = {Percy Liang and Hal {Daum{é} III} and Dan Klein},
booktitle = {International Conference on Machine Learning (ICML)},
title = {Structure Compilation: Trading Structure for Features},
year = {2008},
}
We identify four types of errors that unsupervised induction systems make and study each one in turn. Our contributions include (1) using a meta-model to analyze the incorrect biases of a model in a systematic way, (2) providing an efficient and robust method of measuring distance between two parameter settings of a model, and (3) showing that local optima issues which typically plague EM can be somewhat alleviated by increasing the number of training examples. We conduct our analyses on three models: the HMM, the PCFG, and a simple dependency model.
Error decomposition: approximation, identifiability, estimation, optimization errors.
Used meta-model to analyze approximation error.
Empirically observed that more data reduces optimization error.
@inproceedings{liang08errors,
author = {Percy Liang and Dan Klein},
booktitle = {Human Language Technology and Association for Computational Linguistics (HLT/ACL)},
title = {Analyzing the Errors of Unsupervised Learning},
year = {2008},
}
We present a method for learning bilingual translation lexicons from monolingual corpora. Word types in each language are characterized by purely monolingual features, such as context counts and orthographic substrings. Translations are induced using a generative model based on canonical correlation analysis, which explains the monolingual lexicons in terms of latent matchings. We show that high-precision lexicons can be learned in a variety of language pairs and from a range of corpus types.
By using CCA, can do word alignment without the usual sentence-aligned corpora.
@inproceedings{haghighi08lexicon,
author = {Aria Haghighi and Percy Liang and Taylor Berg-Kirkpatrick and Dan Klein},
booktitle = {Human Language Technology and Association for Computational Linguistics (HLT/ACL)},
title = {Learning Bilingual Lexicons from Monolingual Corpora},
year = {2008},
}
The learning of probabilistic models with many hidden variables
and non-decomposable dependencies is an important and challenging problem.
In contrast to traditional approaches based on approximate inference in a single
intractable model, our approach is to train a set of tractable
submodels by encouraging them to agree on the hidden variables. This allows
us to capture non-decomposable aspects of the data while still maintaining
tractability. We propose an objective function for our approach,
derive EM-style algorithms for parameter estimation, and demonstrate their
effectiveness on three challenging real-world learning tasks.
Setting: unsupervised learning.
Alternative to approximate inference: make two tractable models and train them to agree.
Advantage: maintain existing tractable inference procedures as black-boxes.
@inproceedings{liang08agreement,
author = {Percy Liang and Dan Klein and Michael I. Jordan},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {Agreement-Based Learning},
year = {2008},
}
We present a probabilistic approach to language change in which word forms are
represented by phoneme sequences that undergo stochastic edits along the
branches of a phylogenetic tree. This framework combines the
advantages of the classical comparative method with the robustness
of corpus-based probabilistic models. We use this framework to
explore the consequences of two different schemes for defining
probabilistic models of phonological change, evaluating these
schemes by reconstructing ancient word forms of Romance languages.
The result is an efficient inference procedure for automatically
inferring ancient word forms from modern languages, which can be
generalized to support inferences about linguistic phylogenies.
Feature-based generative model of phonemes of words in a phylogeny of languages.
@inproceedings{bouchard08language,
author = {Alexandre Bouchard-Côté and Percy Liang and Tom Griffiths and Dan Klein},
booktitle = {Advances in Neural Information Processing Systems (NeurIPS)},
title = {A Probabilistic Approach to Language Change},
year = {2008},
}
@inproceedings{liang07tutorial,
author = {Percy Liang and Dan Klein},
booktitle = {Association for Computational Linguistics (ACL)},
title = {Structured {B}ayesian Nonparametric Models with Variational Inference (tutorial)},
year = {2007},
}
We introduce a new inference algorithm for Dirichlet process mixture
models. While Gibbs sampling and variational methods focus on local
moves, the new algorithm makes more global moves. This is done by
introducing a permutation of the data points as an auxiliary variable.
The algorithm is a blocked sampler which alternates between sampling the
clustering and sampling the permutation. The key to the efficiency of
this approach is that it is possible to use dynamic programming to
consider all exponentially many clusterings consistent with a given
permutation. We also show that random projections can be used to
effectively sample the permutation. The result is a stochastic
hill-climbing algorithm that yields burn-in times significantly
smaller than those of collapsed Gibbs sampling.
Task: clustering.
Idea: conditioned on a permutation of the data points, one can consider all possible clusterings of those data points which are consistent with the permutation using dynamic programming.
Idea: if data are ordered, can cluster using dynamic programming.
Let this ordering be a random auxiliary variable and we get a sampler.
@inproceedings{liang07permdp,
author = {Percy Liang and Michael I. Jordan and Ben Taskar},
booktitle = {International Conference on Machine Learning (ICML)},
title = {A permutation-augmented sampler for {D}irichlet process mixture models},
year = {2007},
}
We present a nonparametric Bayesian model of tree structures based on the
hierarchical Dirichlet process (HDP). Our HDP-PCFG model allows the complexity
of the grammar to grow as more training data is available. In addition to
presenting a fully Bayesian model for the PCFG, we also develop an efficient
variational inference procedure. On synthetic data, we recover the correct
grammar without having to specify its complexity in advance. We also show that
our techniques can be applied to full-scale parsing applications by
demonstrating its effectiveness in learning state-split grammars.
A PCFG with an infinite number of states.
Learning: variational inference.
@inproceedings{liang07infpcfg,
author = {Percy Liang and Slav Petrov and Michael I. Jordan and Dan Klein},
booktitle = {Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP/CoNLL)},
title = {The Infinite {PCFG} using Hierarchical {D}irichlet Processes},
year = {2007},
}
We present a probabilistic model of diachronic phonology in which individual
word forms undergo stochastic edits along the branches of a phylogenetic tree.
Our approach allows us to achieve three goals with a single unified
model: (1) reconstruction of both ancient and modern word forms, (2) discovery
of general phonological changes, and (3) selection among different
phylogenies. We learn our model using a Monte Carlo EM algorithm and present
quantitative results validating the model.
Generative model of phonemes of words in a phylogeny of languages
@inproceedings{bouchard07diachronic,
author = {Alexandre Bouchard-Côté and Percy Liang and Tom Griffiths and Dan Klein},
booktitle = {Empirical Methods in Natural Language Processing and Computational Natural Language Learning (EMNLP/CoNLL)},
title = {A Probabilistic Approach to Diachronic Phonology},
year = {2007},
}
We present a perceptron-style discriminative approach to machine
translation in which large feature sets can be exploited. Unlike
discriminative reranking approaches, our system can take advantage of learned
features in all stages of decoding. We first discuss several challenges to
error-driven discriminative approaches. In particular, we explore different
ways of updating parameters given a training example. We find that making
frequent but smaller updates is preferable to making fewer but larger updates.
Then, we discuss an array of features and show both how they quantitatively
increase BLEU score and how they qualitatively interact on specific examples.
One particular feature we investigate is a novel way to introduce learning into
the initial phrase extraction process, which has previously been entirely
heuristic.
Task: machine translation.
Idea: treat machine translation as a structured classification task (learn a map from input sentence to output sentence). Use a Perceptron-like algorithm: decode and update towards maximum BLEU scoring translation on the n-best list.
@inproceedings{liang06discrimative,
author = {Percy Liang and Alexandre Bouchard-Côté and Dan Klein and Ben Taskar},
booktitle = {International Conference on Computational Linguistics and Association for Computational Linguistics (COLING/ACL)},
title = {An End-to-End Discriminative Approach to Machine Translation},
year = {2006},
}
We present an unsupervised approach to symmetric
word alignment in which two simple asymmetric models are
trained jointly to maximize a
combination of data likelihood and agreement between the models.
Compared to the standard practice of intersecting predictions of
independently-trained models, joint training provides a 32% reduction
in AER. Moreover, a simple and efficient pair of HMM aligners
provides a 29% reduction in AER over symmetrized IBM model 4
predictions.
Task: unsupervised word alignment.
Idea: Jointly train two HMM models (one in each direction) to encourage agreement. Uses a simple EM-like algorithm for training.
Result: performance competitive with supervised methods (4.9 AER on Hansards).
@inproceedings{liang06alignment,
author = {Percy Liang and Ben Taskar and Dan Klein},
booktitle = {North American Association for Computational Linguistics (NAACL)},
pages = {104--111},
title = {Alignment by Agreement},
year = {2006},
}
Task: named-entity recognition and Chinese word segmentation
Idea: create features based on unlabeled data to use in Perceptron learning in Markov or semi-Markov models
@mastersthesis{liang05meng,
author = {Percy Liang},
school = {Massachusetts Institute of Technology},
title = {Semi-Supervised Learning for Natural Language},
year = {2005},
}
We introduce the first definition of hyperacyclicity for hypergraphs, a generalization of acyclicity in graphs.
We provide a dynamic data structure for maintaining hyperacyclicity, a generalization of Tarjan's Union-Find algorithm.
@techreport{liang05hypercycle,
author = {Percy Liang and Nathan Srebro},
institution = {Massachusetts Institute of Technology},
title = {A Data Structure for Maintaining Acyclicity in Hypergraphs},
year = {2005},
}
@inproceedings{liang05mcmaster,
author = {Percy Liang and Nathan Srebro},
booktitle = {Mathematical Programing for Data Mining and Machine Learning Workshop at McMaster University},
title = {Linear Programming in Bounded Tree-width {M}arkov Networks},
year = {2005},
}
In parsing sequences using dynamic programming, the subproblems are continguous subsequences (quadratic in number of terminals). In parsing documents or images, the subproblems would be subsets of the terminals (exponential in number of terminals). We introduce (and unify) several ways to constrain these subsets using the geometric structure of the terminals.
@inproceedings{liang05geometric,
author = {Percy Liang and Mukund Narasimhan and Michael Shilman and Paul Viola},
booktitle = {International Conference on Document Analysis and Recognition (ICDAR)},
title = {Efficient Geometric Algorithms for Parsing in Two Dimensions},
year = {2005},
}
Use a greedy procedure to find the maximum likelihood (or MDL) bounded tree-width Markov network (for tree-width 1, equivalent to Chow-Liu maximum spanning trees).
@techreport{liang04markov,
author = {Percy Liang and Nathan Srebro},
institution = {Massachusetts Institute of Technology},
title = {Methods and Experiments With Bounded Tree-width {M}arkov Networks},
year = {2004},
}
Use linear programming to find worst case inputs to a dynamic program in order to explore the tightness of a bound for approximating maximum weight hypertrees with windmill farms.
@techreport{liang03maxwmfarm,
author = {Percy Liang and Nathan Srebro},
institution = {Massachusetts Institute of Technology},
title = {How Much Of A Hypertree Can Be Captured By Windmills?},
year = {2003},
}