COS598C Spring 2014: Scene Understanding

Overview:

This class is to lay the foundation for research in the area of scene understanding of computer vision, by focusing on important topics from practical point of views. This class will review popular approaches and discuss about the fundamental principles underlying scene understanding in computer vision. We will be reading a mixture of papers from computer vision and influential works from cognitive psychology. We will also emphasis implementation techniques to leverage computation power, crowd sourcing and big data for computer vision research in general.

Schedule:

DateTopicPresenterSlide + CodeReading
Feb 3 MonIntroduction + Camera ModelJianxiong Xiao pptx pdf panorama @book{HZ, title={Multiple view geometry in computer vision}, author={Hartley, Richard and Zisserman, Andrew}, volume={2}, year={2000}, publisher={Cambridge Univ Press} } @article{SingleViewMetrology, title={Single view metrology}, author={Criminisi, Antonio and Reid, Ian and Zisserman, Andrew}, journal={International Journal of Computer Vision}, volume={40}, number={2}, pages={123--148}, year={2000}, publisher={Springer} } @article{ObjectPerspective, title={Putting objects in perspective}, author={Hoiem, Derek and Efros, Alexei A and Hebert, Martial}, journal={International Journal of Computer Vision}, volume={80}, number={1}, pages={3--15}, year={2008}, publisher={Springer} } @inproceedings{LabelMe3D, title={Building a database of 3d scenes from user annotations}, author={Russell, Bryan C and Torralba, Antonio}, booktitle={Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on}, pages={2711--2718}, year={2009}, organization={IEEE} }
Feb 5 WedClass Canceled (Severe Weather)
Feb 10 MonLinear Algebra Review + Two View GeometryFisher Yu key

pdf

[SFMedu code]

[Direct code]

[Consistency code]
@book{HZ, title={Multiple view geometry in computer vision}, author={Hartley, Richard and Zisserman, Andrew}, volume={2}, year={2000}, publisher={Cambridge Univ Press} } @inproceedings{PhotoTourism, title={Photo tourism: exploring photo collections in 3D}, author={Snavely, Noah and Seitz, Steven M and Szeliski, Richard}, booktitle={ACM transactions on graphics (TOG)}, volume={25}, number={3}, pages={835--846}, year={2006}, organization={ACM} } @article{QuasiDense, title={A quasi-dense approach to surface reconstruction from uncalibrated images}, author={Lhuillier, Maxime and Quan, Long}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={27}, number={3}, pages={418--433}, year={2005}, publisher={IEEE} } @misc{ceres-solver, author = "Sameer Agarwal and Keir Mierle and Others", title = "Ceres Solver", howpublished = "\url{https://code.google.com/p/ceres-solver/}", }
Feb 12 WedStructure From Motion + Stereo MatchingFisher Yu @article{PMVS, title={Accurate, dense, and robust multiview stereopsis}, author={Furukawa, Yasutaka and Ponce, Jean}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={32}, number={8}, pages={1362--1376}, year={2010}, publisher={IEEE} }
Feb 17 WedFactorization for SFM + Non-rigid SFM + Direct Method for RGBD Fisher Yu @inproceedings{Nonrigid3D, title={Recovering non-rigid 3D shape from image streams}, author={Bregler, Christoph and Hertzmann, Aaron and Biermann, Henning}, booktitle={Computer Vision and Pattern Recognition, 2000. Proceedings. IEEE Conference on}, volume={2}, pages={690--696}, year={2000}, organization={IEEE} } @article{NonrigidSFM, title={Nonrigid structure-from-motion: Estimating shape and motion with hierarchical priors}, author={Torresani, Lorenzo and Hertzmann, Aaron and Bregler, Christoph}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={30}, number={5}, pages={878--892}, year={2008}, publisher={IEEE} } @inproceedings{DirectMethod, title={Robust odometry estimation for rgb-d cameras}, author={Kerl, Christian and Sturm, J{\"u}rgen and Cremers, Daniel}, year={2013}, organization={ICRA} } @inproceedings{DirectMethodICCV, author={F. Steinbruecker and J. Sturm and D. Cremers}, title={Real-Time Visual Odometry from Dense RGB-D Images}, booktitle={Workshop on Live Dense Reconstruction with Moving Cameras at the Intl. Conf. on Computer Vision (ICCV)}, year={2011}, keywords={dense visual odometry,rgb-d,rgb-d benchmark} }
Feb 19 MonKinect FusionSema Berkiten pdf

key

[KinFu code]

[SUN3Dsfm code]

[SiftFu code]

@inproceedings{KinectFusion, title={KinectFusion: Real-time dense surface mapping and tracking}, author={Newcombe, Richard A and Davison, Andrew J and Izadi, Shahram and Kohli, Pushmeet and Hilliges, Otmar and Shotton, Jamie and Molyneaux, David and Hodges, Steve and Kim, David and Fitzgibbon, Andrew}, booktitle={Mixed and augmented reality (ISMAR), 2011 10th IEEE international symposium on}, pages={127--136}, year={2011}, organization={IEEE} } @inproceedings{EfficientICP, title={Efficient variants of the ICP algorithm}, author={Rusinkiewicz, Szymon and Levoy, Marc}, booktitle={3-D Digital Imaging and Modeling, 2001. Proceedings. Third International Conference on}, pages={145--152}, year={2001}, organization={IEEE} } @INPROCEEDINGS{GeneralizedICP, AUTHOR = {A. Segal AND D. Haehnel AND S. Thrun}, TITLE = {Generalized-ICP}, BOOKTITLE = {Proceedings of Robotics: Science and Systems}, YEAR = {2009}, ADDRESS = {Seattle, USA}, MONTH = {June}, } @article{LargeKinectFusion, title={Scalable real-time volumetric surface reconstruction}, author={Chen, Jiawen and Bautembach, Dennis and Izadi, Shahram}, journal={ACM Transactions on Graphics (TOG)}, volume={32}, number={4}, pages={113}, year={2013}, publisher={ACM} } @article{Kintinuous, title={Kintinuous: Spatially extended kinectfusion}, author={Whelan, Thomas and Kaess, Michael and Fallon, Maurice and Johannsson, Hordur and Leonard, John and McDonald, John}, year={2012} } @inproceedings{KintinuousLoop, title={Deformation-based loop closure for large scale dense rgb-d slam}, author={Whelan, Thomas and Kaess, Michael and Leonard, J and McDonald, John}, booktitle={IEEE/RSJ Intl. Conf. on Intelligent Robots and Systems}, year={2013} } @inproceedings{KintinuousRobust, title={Robust real-time visual odometry for dense RGB-D mapping}, author={Whelan, Thomas and Johannsson, Hordur and Kaess, Michael and Leonard, John J and McDonald, John}, booktitle={IEEE Intl. Conf. on Robotics and Automation, ICRA, Karlsruhe, Germany}, year={2013} } @article {NonRigid, Author = {Hao Li and Bart Adams and Leonidas J. Guibas and Mark Pauly}, Title = {Robust Single-View Geometry And Motion Reconstruction}, Journal = {ACM Transactions on Graphics (Proceedings SIGGRAPH Asia 2009)}, Volume = {28}, Number = {5}, Year = {2009}, Month = {December}, Location = {Yokohama, Japan}, Publisher = {ACM}, Paddress = {New York, NY, USA} } @article {SelfPortraits, Author = {Hao Li and Etienne Vouga and Anton Gudym and Linjie Luo and Jonathan T. Barron and Gleb Gusev}, Title = {3D Self-Portraits}, Journal = {ACM Transactions on Graphics (Proceedings SIGGRAPH Asia 2013)}, Volume = {32}, Number = {6}, Year = {2013}, Month = {November}, Publisher = {ACM}, Paddress = {New York, NY, USA} } @inproceedings{KeyFrameFusion, title={On unifying key-frame and voxel-based dense visual SLAM at large scales}, author={Meilland, Maxime and Comport, Andrew I}, booktitle={Intelligent Robots and Systems (IROS), 2013 IEEE/RSJ International Conference on}, pages={3677--3683}, year={2013}, organization={IEEE} } @inproceedings{HDRslam, title={3D High Dynamic Range dense visual SLAM and its application to real-time object re-lighting}, author={Meilland, Maxime and Barat, Christian and Comport, Andrew}, booktitle={Mixed and Augmented Reality (ISMAR), 2013 IEEE International Symposium on}, pages={143--152}, year={2013}, organization={IEEE} } @article{SuperResolutionSLAM, title={Super-Resolution 3D Tracking and Mapping}, author={Meilland, Maxime and Comport, Andrew I} } @article{Elastic, title={Elastic Fragments for Dense Scene Reconstruction}, author={Zhou, Qian-Yi and Miller, Stephen and Koltun, Vladlen}, journal={environments}, volume={27}, number={16}, pages={7--35} }
Feb 24 MonConvolutional Neural NetworkZhirong Wu pdf

[Jianxiong's note]

[Matlab Demo]

[Web Demo]

[Alex Code]

[Caffe Code]
@article{CNNnote, title={Notes on convolutional neural networks}, author={Bouvrie, Jake}, year={2006} } @article{ParallelCognition, title={The parallel distributed processing approach to semantic cognition}, author={McClelland, James L and Rogers, Timothy T}, journal={Nature Reviews Neuroscience}, volume={4}, number={4}, pages={310--322}, year={2003}, publisher={Nature Publishing Group} } @article{Connectionist, title={Learning and connectionist representations}, author={Rumelhart, David E and Todd, Peter M}, journal={Attention and performance XIV: Synergies in experimental psychology, artificial intelligence, and cognitive neuroscience}, pages={3--30}, year={1993} } @inproceedings{DCNN, title={Imagenet classification with deep convolutional neural networks}, author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoff}, booktitle={Advances in Neural Information Processing Systems 25}, pages={1106--1114}, year={2012} } @article{LecunNet, title={Backpropagation applied to handwritten zip code recognition}, author={LeCun, Yann and Boser, Bernhard and Denker, John S and Henderson, Donnie and Howard, Richard E and Hubbard, Wayne and Jackel, Lawrence D}, journal={Neural computation}, volume={1}, number={4}, pages={541--551}, year={1989}, publisher={MIT Press} } @article{BestCNN, title={Visualizing and Understanding Convolutional Neural Networks}, author={Zeiler, Matthew D and Fergus, Rob}, journal={arXiv preprint arXiv:1311.2901}, year={2013} } @misc{Caffe, Author = {Yangqing Jia}, Title = { {Caffe}: An Open Source Convolutional Architecture for Fast Feature Embedding}, Year = {2013}, Howpublished = {http://caffe.berkeleyvision.org/} } @inproceedings{DeepDetection, title={Deep Neural Networks for Object Detection}, author={Szegedy, Christian and Toshev, Alexander and Erhan, Dumitru}, booktitle={Advances in Neural Information Processing Systems}, pages={2553--2561}, year={2013} } @article{BengioRepresentation, title={Representation learning: A review and new perspectives}, author={Bengio, Yoshua and Courville, Aaron and Vincent, Pascal}, year={2013}, publisher={IEEE} }
Feb 26 WedAutoencoderDavid Dohan pptx

pdf

[Autoencoder Code]

[RBM code]

[DBM code]
@article{AutoEncoder, title={Reducing the dimensionality of data with neural networks}, author={Hinton, Geoffrey E and Salakhutdinov, Ruslan R}, journal={Science}, volume={313}, number={5786}, pages={504--507}, year={2006}, publisher={American Association for the Advancement of Science} }
Mar 3 MonRBM + DBM + DBNDavid Dohan @inproceedings{RBM, title={Restricted Boltzmann machines for collaborative filtering}, author={Salakhutdinov, Ruslan and Mnih, Andriy and Hinton, Geoffrey}, booktitle={Proceedings of the 24th international conference on Machine learning}, pages={791--798}, year={2007}, organization={ACM} } @inproceedings{DBM, title={Deep boltzmann machines}, author={Salakhutdinov, Ruslan and Hinton, Geoffrey E}, booktitle={International Conference on Artificial Intelligence and Statistics}, pages={448--455}, year={2009} } @article{DBN, title={A fast learning algorithm for deep belief nets}, author={Hinton, Geoffrey E and Osindero, Simon and Teh, Yee-Whye}, journal={Neural computation}, volume={18}, number={7}, pages={1527--1554}, year={2006}, publisher={MIT Press} }
Mar 5 WedVision and Action: Reinforcement + Apprenticeship LearningChenyi Chen pdf

pptx

[demo]
@article{DeepRL, title={Playing Atari with Deep Reinforcement Learning}, author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Graves, Alex and Antonoglou, Ioannis and Wierstra, Daan and Riedmiller, Martin}, journal={arXiv preprint arXiv:1312.5602}, year={2013} } @inproceedings{ApprenticeshipLearning, title={Apprenticeship learning via inverse reinforcement learning}, author={Abbeel, Pieter and Ng, Andrew Y}, booktitle={Proceedings of the twenty-first international conference on Machine learning}, pages={1}, year={2004}, organization={ACM}, url={http://ai.stanford.edu/~pabbeel/irl/} }
Mar 10 MonGPU ProgrammingMaciej Halber pdf

key

[example code]
CUDA C Programming Guide
GPU Programming in MATLAB
GPUmat
Mar 12 WedMRF + CRF + GC + LBPHuiwen Chang pdf

pptx

[BP Code]

[GraphCut Code gco]

[MRFsfm]
@article{BP, title={Understanding belief propagation and its generalizations}, author={Yedidia, Jonathan S and Freeman, William T and Weiss, Yair}, journal={Exploring artificial intelligence in the new millennium}, volume={8}, pages={236--239}, year={2003}, publisher={chapter} } @article{GraphCut, title={Fast approximate energy minimization via graph cuts}, author={Boykov, Yuri and Veksler, Olga and Zabih, Ramin}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={23}, number={11}, pages={1222--1239}, year={2001}, publisher={IEEE} } @techreport{DistanceTransform, title={Distance transforms of sampled functions}, author={Felzenszwalb, Pedro and Huttenlocher, Daniel}, year={2004}, institution={Cornell University} } @article{LazySnapping, title={Lazy snapping}, author={Li, Yin and Sun, Jian and Tang, Chi-Keung and Shum, Heung-Yeung}, journal={ACM Transactions on Graphics (ToG)}, volume={23}, number={3}, pages={303--308}, year={2004}, publisher={ACM} } @article{ConnectedCRF, title={Efficient inference in fully connected crfs with gaussian edge potentials}, author={Kr{\"a}henb{\"u}hl, Philipp and Koltun, Vladlen}, journal={arXiv preprint arXiv:1210.5644}, year={2012} } @inproceedings{MRFsfm, author = {David Crandall and Andrew Owens and Noah Snavely and Daniel P. Huttenlocher}, booktitle = {Proc. IEEE Conf. on Computer Vision and Pattern Recognition}, title = {Discrete-Continuous Optimization for Large-Scale Structure from Motion}, year = {2011} } @article{MRFsfmPAMI, author = {David Crandall and Andrew Owens and Noah Snavely and Daniel Huttenlocher}, title = {{SfM with MRFs}: Discrete-Continuous Optimization for Large-Scale Reconstruction}, journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence}, year = {to appear} } @article{EfficientBP, title={Efficient belief propagation for early vision}, author={Felzenszwalb, Pedro F and Huttenlocher, Daniel P}, journal={International journal of computer vision}, volume={70}, number={1}, pages={41--54}, year={2006}, publisher={Springer} } @incollection{TextonBoost, title={Textonboost: Joint appearance, shape and context modeling for multi-class object recognition and segmentation}, author={Shotton, Jamie and Winn, John and Rother, Carsten and Criminisi, Antonio}, booktitle={Computer Vision--ECCV 2006}, pages={1--15}, year={2006}, publisher={Springer} } @inproceedings{CRFobject, title={Conditional random fields for object recognition}, author={Quattoni, Ariadna and Collins, Michael and Darrell, Trevor}, booktitle={Advances in neural information processing systems}, pages={1097--1104}, year={2004} }
Mar 17 MonNo Class (Spring Recess)
Mar 19 WedNo Class (Spring Recess)
Mar 24 MonCloud ComputingJohn McSpedon pdf

pptx

demo code

Mar 26 WedObject DetectionShuran Song pdf

pptx

[DPM code]

[Vlfeat code]

[Color SIFT code]
@article{DevaSVM, title={Dual coordinate solvers for large-scale structural SVMs}, author={Ramanan, Deva}, journal={arXiv preprint arXiv:1312.1743}, year={2013} } @article{PictorialStructure, title={The representation and matching of pictorial structures}, author={Fischler, Martin A and Elschlager, Robert A}, journal={Computers, IEEE Transactions on}, volume={100}, number={1}, pages={67--92}, year={1973}, publisher={IEEE} } @inproceedings{DalalTriggs, title={Histograms of oriented gradients for human detection}, author={Dalal, Navneet and Triggs, Bill}, booktitle={Computer Vision and Pattern Recognition, 2005. CVPR 2005. IEEE Computer Society Conference on}, volume={1}, pages={886--893}, year={2005}, organization={IEEE} } @article{DPM, title = "Object Detection with Discriminatively Trained Part Based Models", author = "Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D. and Ramanan, D.", journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", year = "2010", volume = "32", number = "9", pages = "1627--1645"} @inproceedings{ExemplarSVMs, title={Ensemble of exemplar-svms for object detection and beyond}, author={Malisiewicz, Tomasz and Gupta, Abhinav and Efros, Alexei A}, booktitle={Computer Vision (ICCV), 2011 IEEE International Conference on}, pages={89--96}, year={2011}, organization={IEEE} } @inproceedings{PartMixtures, title={Articulated pose estimation with flexible mixtures-of-parts}, author={Yang, Yi and Ramanan, Deva}, booktitle={Computer Vision and Pattern Recognition (CVPR), 2011 IEEE Conference on}, pages={1385--1392}, year={2011}, organization={IEEE} } @InProceedings{Poselet, author = "Lubomir Bourdev and Jitendra Malik", title = "Poselets: Body Part Detectors Trained Using 3D Human Pose Annotations", booktitle = "International Conference on Computer Vision (ICCV)", year = "2009", url = "http://www.eecs.berkeley.edu/~lbourdev/poselets" } @article{ExemplarSVMsMatching, author = {Shrivastava, Abhinav and Malisiewicz, Tomasz and Gupta, Abhinav and Efros, Alexei A.}, title = {Data-driven Visual Similarity for Cross-domain Image Matching}, journal = {ACM Transaction of Graphics (TOG) (Proceedings of ACM SIGGRAPH ASIA)}, year = {2011}, volume = {30}, number = {6}, } @inproceedings{FindingThings, title={Finding things: Image parsing with regions and per-exemplar detectors}, author={Tighe, Joseph and Lazebnik, Svetlana}, booktitle={Computer Vision and Pattern Recognition (CVPR), 2013 IEEE Conference on}, pages={3001--3008}, year={2013}, organization={IEEE} } @inproceedings{SelectiveSearch, title={Segmentation as selective search for object recognition}, author={van de Sande, Koen EA and Uijlings, Jasper RR and Gevers, Theo and Smeulders, Arnold WM}, booktitle={Computer Vision (ICCV), 2011 IEEE International Conference on}, pages={1879--1886}, year={2011}, organization={IEEE} } @article{Regionlets, title={Regionlets for Generic Object Detection}, author={Wang, Xiaoyu and Yang, Ming and Zhu, Shenghuo and Lin, Yuanqing} } @inproceedings{CF, title={Model recommendation for action recognition}, author={Matikainen, Pyry and Sukthankar, Rahul and Hebert, Martial}, booktitle={Computer Vision and Pattern Recognition (CVPR), 2012 IEEE Conference on}, pages={2256--2263}, year={2012}, organization={IEEE} } @incollection{LDA, title={Discriminative decorrelation for clustering and classification}, author={Hariharan, Bharath and Malik, Jitendra and Ramanan, Deva}, booktitle={Computer Vision--ECCV 2012}, pages={459--472}, year={2012}, publisher={Springer} } @inproceedings{Cuboid, title = {Localizing 3{D} Cuboids in Single-view Images}, author = {Jianxiong Xiao and Bryan Russell and Antonio Torralba}, booktitle = {NIPS}, year = {2012}, }
Mar 31 MonFeatures and DatasetsShuran Song @article{SIFT, title={Distinctive image features from scale-invariant keypoints}, author={Lowe, David G}, journal={International journal of computer vision}, volume={60}, number={2}, pages={91--110}, year={2004}, publisher={Springer} } @Article{ColorSIFT, author = "van de Sande, K. E. A. and Gevers, T. and Snoek, C. G. M.", title = "Evaluating Color Descriptors for Object and Scene Recognition", journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", number = "9", volume = "32", pages = "1582--1596", year = "2010", url = "http://www.science.uva.nl/research/publications/2010/vandeSandeTPAMI2010" } @inproceedings{DalalTriggs, title={Histograms of oriented gradients for human detection}, author={Dalal, Navneet and Triggs, Bill}, booktitle={Computer Vision and Pattern Recognition, 2005. CVPR 2005. IEEE Computer Society Conference on}, volume={1}, pages={886--893}, year={2005}, organization={IEEE} } @article{DPM, title = "Object Detection with Discriminatively Trained Part Based Models", author = "Felzenszwalb, P. F. and Girshick, R. B. and McAllester, D. and Ramanan, D.", journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", year = "2010", volume = "32", number = "9", pages = "1627--1645"} @article{GIST, title={Modeling the shape of the scene: A holistic representation of the spatial envelope}, author={Oliva, Aude and Torralba, Antonio}, journal={International journal of computer vision}, volume={42}, number={3}, pages={145--175}, year={2001}, publisher={Springer} } @article{LBP, title={Multiresolution gray-scale and rotation invariant texture classification with local binary patterns}, author={Ojala, Timo and Pietikainen, Matti and Maenpaa, Topi}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={24}, number={7}, pages={971--987}, year={2002}, publisher={IEEE} } @article{PrinciplesOfCategorization, title={Principles of categorization}, author={Rosch, Eleanor}, journal={Concepts: core readings}, pages={189--206}, year={1999} } @article{Visipedia, title={Vision of a Visipedia}, author={Perona, Pietro}, journal={Proceedings of the IEEE}, volume={98}, number={8}, pages={1526--1534}, year={2010}, publisher={IEEE} } @article{SUNDB, author = {Jianxiong Xiao and Krista A. Ehinger and James Hays and Antonio Torralba and Aude Oliva}, title = {SUN Database: Exploring a Large Collection of Scene Categories}, journal = {IJCV}, year = {2014}, } @article{PASCAL, title={The pascal visual object classes (voc) challenge}, author={Everingham, Mark and Van Gool, Luc and Williams, Christopher KI and Winn, John and Zisserman, Andrew}, journal={International journal of computer vision}, volume={88}, number={2}, pages={303--338}, year={2010}, publisher={Springer} } @inproceedings{ImageNet, title={Imagenet: A large-scale hierarchical image database}, author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, booktitle={Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on}, pages={248--255}, year={2009}, organization={IEEE} } @inproceedings{SUN3D, author = {Jianxiong Xiao and Andrew Owens and Antonio Torralba}, title = {{SUN3D}: A Database of Big Spaces Reconstructed using SfM and Object Labels}, booktitle = {ICCV}, year = {2013}, url = {http://vision.princeton.edu/projects/2013/SUN3D/paper.pdf}, }
Apr 2 WedBOW + SPM + Sparse CodingXinyi Fan pdf

key

@inproceedings{SPM, title={Beyond bags of features: Spatial pyramid matching for recognizing natural scene categories}, author={Lazebnik, Svetlana and Schmid, Cordelia and Ponce, Jean}, booktitle={Computer Vision and Pattern Recognition, 2006 IEEE Computer Society Conference on}, volume={2}, pages={2169--2178}, year={2006}, organization={IEEE} } @inproceedings{LLC, title={Locality-constrained linear coding for image classification}, author={Wang, Jinjun and Yang, Jianchao and Yu, Kai and Lv, Fengjun and Huang, Thomas and Gong, Yihong}, booktitle={Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on}, pages={3360--3367}, year={2010}, organization={IEEE} } @inproceedings{LSPM, title={Linear spatial pyramid matching using sparse coding for image classification}, author={Yang, Jianchao and Yu, Kai and Gong, Yihong and Huang, Thomas}, booktitle={Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on}, pages={1794--1801}, year={2009}, organization={IEEE} } @article{FisherVector, title={Image Classification with the Fisher Vector: Theory and Practice}, author={S{\'a}nchez, Jorge and Perronnin, Florent and Mensink, Thomas and Verbeek, Jakob}, journal={International Journal of Computer Vision}, pages={1--24}, year={2013}, publisher={Springer} } @incollection{FisherKernel, title={Improving the fisher kernel for large-scale image classification}, author={Perronnin, Florent and S{\'a}nchez, Jorge and Mensink, Thomas}, booktitle={Computer Vision--ECCV 2010}, pages={143--156}, year={2010}, publisher={Springer} } @article{CodingComparison, title={The devil is in the details: an evaluation of recent feature encoding methods}, author={Chatfield, Ken and Lempitsky, Victor and Vedaldi, Andrea and Zisserman, Andrew}, year={2011} } @inproceedings{SmallCodes, title={Small codes and large image databases for recognition}, author={Torralba, Antonio and Fergus, Robert and Weiss, Yair}, booktitle={Computer Vision and Pattern Recognition, 2008. CVPR 2008. IEEE Conference on}, pages={1--8}, year={2008}, organization={IEEE} } @incollection{MultidimensionalSpectralHashing, title={Multidimensional spectral hashing}, author={Weiss, Yair and Fergus, Rob and Torralba, Antonio}, booktitle={Computer Vision--ECCV 2012}, pages={340--353}, year={2012}, publisher={Springer} } @inproceedings{SpectralHashing, title={Spectral hashing}, author={Weiss, Yair and Torralba, Antonio and Fergus, Rob}, booktitle={Advances in neural information processing systems}, pages={1753--1760}, year={2008} } @article{CompactCodes, title={Aggregating local image descriptors into compact codes}, author={J{\'e}gou, Herv{\'e} and Perronnin, Florent and Douze, Matthijs and Schmid, Cordelia and others}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={34}, number={9}, pages={1704--1716}, year={2012}, publisher={IEEE} }
Apr 7 MonInstance-level MatchingPingmei Xu pdf

key

@inproceedings{VideoGoogle, title={Video Google: A text retrieval approach to object matching in videos}, author={Sivic, Josef and Zisserman, Andrew}, booktitle={Computer Vision, 2003. Proceedings. Ninth IEEE International Conference on}, pages={1470--1477}, year={2003}, organization={IEEE} } @inproceedings{GoogleGoggle, title={Object retrieval with large vocabularies and fast spatial matching}, author={Philbin, James and Chum, Ondrej and Isard, Michael and Sivic, Josef and Zisserman, Andrew}, booktitle={Computer Vision and Pattern Recognition, 2007. CVPR'07. IEEE Conference on}, pages={1--8}, year={2007}, organization={IEEE} } @inproceedings{Quantization, title={Lost in quantization: Improving particular object retrieval in large scale image databases}, author={Philbin, James and Chum, Ondrej and Isard, Michael and Sivic, Josef and Zisserman, Andrew}, booktitle={Computer Vision and Pattern Recognition, 2008. CVPR 2008. IEEE Conference on}, pages={1--8}, year={2008}, organization={IEEE} } @inproceedings{TotalRecall, title={Total recall: Automatic query expansion with a generative feature model for object retrieval}, author={Chum, Ondrej and Philbin, James and Sivic, Josef and Isard, Michael and Zisserman, Andrew}, booktitle={Computer Vision, 2007. ICCV 2007. IEEE 11th International Conference on}, pages={1--8}, year={2007}, organization={IEEE} } @article{InstanceLevelRecognition, title={3d object modeling and recognition using local affine-invariant image descriptors and multi-view spatial constraints}, author={Rothganger, Fred and Lazebnik, Svetlana and Schmid, Cordelia and Ponce, Jean}, journal={International Journal of Computer Vision}, volume={66}, number={3}, pages={231--259}, year={2006}, publisher={Kluwer Academic Publishers} } @incollection{GeometricEra, title={Object recognition in the geometric era: A retrospective}, author={Mundy, Joseph L}, booktitle={Toward category-level object recognition}, pages={3--28}, year={2006}, publisher={Springer} }
Apr 9 WedWeb ProgrammingPingmei Xu pdf

key
w3schools.com
Apr 14 MonWebGL + Blender (Basic + Command Line Tool) Maciej Halber WebGL pdf
WebGL key
WebGL code

Blender key
Blender pdf
BlenderScript
BlenderFiles
Learning WebGL Lessons
Apr 16 WedCrowd SourcingSimin Chen pdf

pptx

[Matlab Turk API]

[DrawMe code]

[TurkCleaner code]
@incollection{HumanInTheLoop, title={Visual recognition with humans in the loop}, author={Branson, Steve and Wah, Catherine and Schroff, Florian and Babenko, Boris and Welinder, Peter and Perona, Pietro and Belongie, Serge}, booktitle={Computer Vision--ECCV 2010}, pages={438--451}, year={2010}, publisher={Springer} } @inproceedings{Rating, title={Online crowdsourcing: rating annotators and obtaining cost-effective labels}, author={Welinder, Peter and Perona, Pietro}, booktitle={Computer Vision and Pattern Recognition Workshops (CVPRW), 2010 IEEE Computer Society Conference on}, pages={25--32}, year={2010}, organization={IEEE} } @inproceedings{InteractiveTraining, title={Strong supervision from weak annotation: Interactive training of deformable part models}, author={Branson, Steve and Perona, Pietro and Belongie, Serge}, booktitle={Computer Vision (ICCV), 2011 IEEE International Conference on}, pages={1832--1839}, year={2011}, organization={IEEE} } @inproceedings{Turkit, title={Turkit: human computation algorithms on mechanical turk}, author={Little, Greg and Chilton, Lydia B and Goldman, Max and Miller, Robert C}, booktitle={Proceedings of the 23nd annual ACM symposium on User interface software and technology}, pages={57--66}, year={2010}, organization={ACM} } @inproceedings{ParallelHuman, title={Exploring iterative and parallel human computation processes}, author={Little, Greg and Chilton, Lydia B and Goldman, Max and Miller, Robert C}, booktitle={Proceedings of the ACM SIGKDD workshop on human computation}, pages={68--76}, year={2010}, organization={ACM} } @phdthesis{ProgrammingHuman, title={Programming with human computation}, author={Little, Greg Danny Greg}, year={2011}, school={Massachusetts Institute of Technology} } @article{CrowdPowered, title={Crowd-powered systems}, author={Bernstein, Michael S}, journal={KI-K{\"u}nstliche Intelligenz}, volume={27}, number={1}, pages={69--73}, year={2013}, publisher={Springer} } @inproceedings{ImageNet, title={Imagenet: A large-scale hierarchical image database}, author={Deng, Jia and Dong, Wei and Socher, Richard and Li, Li-Jia and Li, Kai and Fei-Fei, Li}, booktitle={Computer Vision and Pattern Recognition, 2009. CVPR 2009. IEEE Conference on}, pages={248--255}, year={2009}, organization={IEEE} }
Apr 21 MonScene and ContextYinda Zhang pdf

pptx

@inproceedings{GeometricContext, title={Geometric context from a single image}, author={Hoiem, Derek and Efros, Alexei A and Hebert, Martial}, booktitle={Computer Vision, 2005. ICCV 2005. Tenth IEEE International Conference on}, volume={1}, pages={654--661}, year={2005}, organization={IEEE} } @inproceedings{PhotoPop-up, title={Automatic photo pop-up}, author={Hoiem, Derek and Efros, Alexei A and Hebert, Martial}, booktitle={ACM Transactions on Graphics (TOG)}, volume={24}, number={3}, pages={577--584}, year={2005}, organization={ACM} } @inproceedings{RGBDcuboid, author = {Hao Jiang and Jianxiong Xiao}, title = {A Linear Approach to Matching Cuboids in {RGBD} Images}, booktitle = {CVPR}, year = {2013}, } @incollection{ExactLayout, title={Efficient exact inference for 3d indoor scene understanding}, author={Schwing, Alexander G and Urtasun, Raquel}, booktitle={Computer Vision--ECCV 2012}, pages={299--313}, year={2012}, publisher={Springer} } @article{BoxInBox, title={Box In the Box: Joint 3D Layout and Object Reasoning from Single Images}, author={Schwing, Alexander G and Fidler, Sanja and Pollefeys, Marc and Urtasun, Raquel} } @article{ObjectPerspective, title={Putting objects in perspective}, author={Hoiem, Derek and Efros, Alexei A and Hebert, Martial}, journal={International Journal of Computer Vision}, volume={80}, number={1}, pages={3--15}, year={2008}, publisher={Springer} } @article{Make3D, title={Make3d: Learning 3d scene structure from a single still image}, author={Saxena, Ashutosh and Sun, Min and Ng, Andrew Y}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={31}, number={5}, pages={824--840}, year={2009}, publisher={IEEE} } @inproceedings{HallucinateHuman, title={Hallucinated Humans as the Hidden Context for Labeling 3D Scenes}, author={Yun Jiang and Hema Koppula and Ashutosh Saxena}, year={2013}, booktitle={CVPR} } @inproceedings{RoomLayout, title={Recovering the spatial layout of cluttered rooms}, author={Hedau, Varsha and Hoiem, Derek and Forsyth, David}, booktitle={Computer vision, 2009 IEEE 12th international conference on}, pages={1849--1856}, year={2009}, organization={IEEE} } @book{StochasticGrammar, title={A stochastic grammar of images}, author={Zhu, Song Chun and Mumford, David}, volume={2}, number={4}, year={2007}, publisher={Now Publishers Inc} } @article{DDMCMC, title={Image segmentation by data-driven Markov chain Monte Carlo}, author={Tu, Zhuowen and Zhu, Song-Chun}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={24}, number={5}, pages={657--673}, year={2002}, publisher={IEEE} } @article{ImageParsing, title={Image parsing: Unifying segmentation, detection, and recognition}, author={Tu, Zhuowen and Chen, Xiangrong and Yuille, Alan L and Zhu, Song-Chun}, journal={International Journal of Computer Vision}, volume={63}, number={2}, pages={113--140}, year={2005}, publisher={Springer} } @inproceedings{AutoContext, title={Auto-context and its application to high-level vision tasks}, author={Tu, Zhuowen}, booktitle={Computer Vision and Pattern Recognition, 2008. CVPR 2008. IEEE Conference on}, pages={1--8}, year={2008}, organization={IEEE} } @article{GrammarParsing, title={Bottom-up/top-down image parsing with attribute grammar}, author={Han, Feng and Zhu, Song-Chun}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={31}, number={1}, pages={59--73}, year={2009}, publisher={IEEE} } @article{AndOrGraph, title={A numerical study of the bottom-up and top-down inference processes in and-or graphs}, author={Wu, Tianfu and Zhu, Song-Chun}, journal={International journal of computer vision}, volume={93}, number={2}, pages={226--252}, year={2011}, publisher={Springer} } @article{SimulationScene, title={Simulation as an engine of physical scene understanding}, author={Battaglia, Peter W and Hamrick, Jessica B and Tenenbaum, Joshua B}, journal={Proceedings of the National Academy of Sciences}, volume={110}, number={45}, pages={18327--18332}, year={2013}, publisher={National Acad Sciences} } @article{GrowMind, title={How to grow a mind: Statistics, structure, and abstraction}, author={Tenenbaum, Joshua B and Kemp, Charles and Griffiths, Thomas L and Goodman, Noah D}, journal={science}, volume={331}, number={6022}, pages={1279--1285}, year={2011}, publisher={American Association for the Advancement of Science}, url={http://web.mit.edu/cocosci/Papers/tkgg-science11-reprint.pdf} } @article{ProbabilisticGraphics, title={Approximate Bayesian image interpretation using generative probabilistic graphics programs}, author={Mansinghka, Vikash K and Kulkarni, Tejas D and Perov, Yura N and Tenenbaum, Joshua B}, year={2013} }
Apr 23 WedSemantic SegmentationBebe Shi pdf

[TextonBoost Code]

[TextonForest Code]

[SiftFlow Code]

[Label Transfer Code]

[SuperParsing Code]
@incollection{TextonBoost, title={Textonboost: Joint appearance, shape and context modeling for multi-class object recognition and segmentation}, author={Shotton, Jamie and Winn, John and Rother, Carsten and Criminisi, Antonio}, booktitle={Computer Vision--ECCV 2006}, pages={1--15}, year={2006}, publisher={Springer} } @inproceedings{TextonForest, title={Semantic texton forests for image categorization and segmentation}, author={Shotton, Jamie and Johnson, Matthew and Cipolla, Roberto}, booktitle={Computer Vision and Pattern Recognition, 2008. CVPR 2008. IEEE Conference on}, pages={1--8}, year={2008}, organization={IEEE} } @incollection{SiftFlow, title={SIFT flow: dense correspondence across different scenes}, author={Liu, Ce and Yuen, Jenny and Torralba, Antonio and Sivic, Josef and Freeman, William T}, booktitle={Computer Vision--ECCV 2008}, pages={28--42}, year={2008}, publisher={Springer} } @article{LabelTransfer, title={Nonparametric scene parsing via label transfer}, author={Liu, Ce and Yuen, Jenny and Torralba, Antonio}, journal={Pattern Analysis and Machine Intelligence, IEEE Transactions on}, volume={33}, number={12}, pages={2368--2382}, year={2011}, publisher={IEEE} }
Apr 28 MonCompressive SensingLi-Fang Cheng pdf

pptx

L1 magic
@article{InformativeSensingArXiv, title={Informative sensing}, author={Chang, Hyun Sung and Weiss, Yair and Freeman, William T}, journal={arXiv preprint arXiv:0901.4275}, year={2009} } @inproceedings{InformativeSensingICIP, title={Informative sensing of natural images}, author={Chang, Hyun Sung and Weiss, Yair and Freeman, William T}, booktitle={Image Processing (ICIP), 2009 16th IEEE International Conference on}, pages={3025--3028}, year={2009}, organization={IEEE} } @phdthesis{InformativeSensing, title={Informative sensing: theory and applications}, author={Chang, Hyun Sung}, year={2012}, school={Massachusetts Institute of Technology} }
Apr 30 Wed How to do research + Open Discussion Jianxiong Xiao pdf

pptx
Bill Freeman's how to do research
Bill Freeman's crowd sourced note
Ramesh Raskar's How to invent: The Idea Hexagon

Tentative Topics:

Class Requirement:

Reading List:

Resources:

Books

There is no textbook for this class. The following are just references if you are interested.

Computer vision:

Learning:

Graphical models:

Related Courses:

Computer Vision Class at Princeton

By Antonio Torralba at MIT:

By Alyosha Efros at CMU/Berkeley:

By James Hays at Brown:

By others:

Code and Datasets

Songs