QuickSearch:   Number of matching entries: 0.

Search Settings

 Hongxiao Bai, Mingxuan Wang, Hai Zhao and Lei Li, "Self-Training with Heterogeneous Teachers Improves Neural Machine Translation", 2020. BibTeX: @unpublished{bai2020self, author = {Hongxiao Bai and Mingxuan Wang and Hai Zhao and Lei Li}, title = {Self-Training with Heterogeneous Teachers Improves Neural Machine Translation}, year = {2020} }  Hongxiao Bai, Mingxuan Wang, Hai Zhao and Lei Li, "Triangular Unsupervised Neural Machine Translation", 2020. BibTeX: @unpublished{bai2020triangular, author = {Hongxiao Bai and Mingxuan Wang and Hai Zhao and Lei Li}, title = {Triangular Unsupervised Neural Machine Translation}, year = {2020} }  Yu Bao, Hao Zhou, Jiangtao Feng, Mingxuan Wang, Shujian Huang, Jiajun Chen and Lei Li, "PNAT: Non-autoregressive Transformer by Position Learning", 2020. in submission BibTeX: @unpublished{bao2020pnat, author = {Yu Bao and Hao Zhou and Jiangtao Feng and Mingxuan Wang and Shujian Huang and Jiajun Chen and Lei Li}, title = {PNAT: Non-autoregressive Transformer by Position Learning}, year = {2020}, note = {in submission} }  Qianqian Dong, Mingxuan Wang, Hao Zhou, Zhen Yang, Bo Xu and Lei Li, "Doubly Supervised Encoder for End-to-end Speech Translation", 2020. BibTeX: @unpublished{dong2020doubly, author = {Qianqian Dong and Mingxuan Wang and Hao Zhou and Zhen Yang and Bo Xu and Lei Li}, title = {Doubly Supervised Encoder for End-to-end Speech Translation}, year = {2020} }  Xunpeng Huang, Zhengyang Liu, Zhe Wang, Yue Yu and Lei Li, "Acutum: an Adaptive Angular Regularized Optimization Algorithm", 2020. BibTeX: @unpublished{huang2020acutum, author = {Xunpeng Huang and Zhengyang Liu and Zhe Wang and Yue Yu and Lei Li}, title = {Acutum: an Adaptive Angular Regularized Optimization Algorithm}, year = {2020} }  Tao Kong, Fuchun Sun, Huaping Liu, Yuning Jiang, Lei Li and Jianbo Shi, "FoveaBox: Beyound Anchor-based Object Detection", IEEE Transactions on Image Processing, pp. 1-10., 2020. BibTeX: @article{kong2020foveabox, author = {Tao Kong and Fuchun Sun and Huaping Liu and Yuning Jiang and Lei Li and Jianbo Shi}, title = {FoveaBox: Beyound Anchor-based Object Detection}, journal = {IEEE Transactions on Image Processing}, year = {2020}, pages = {1-10}, doi = {https://doi.org/10.1109/TIP.2020.3002345} }  Mingwei Li, Qingyuan Jiang, Yi He, Lei Li and Wujun Li, "Bidirectional Attentive Convolutional Neural Network for Near-Duplicate Video Retrieval", 2020. BibTeX: @unpublished{li2020bidirectional, author = {Mingwei Li and Qingyuan Jiang and Yi He and Lei Li and Wujun Li}, title = {Bidirectional Attentive Convolutional Neural Network for Near-Duplicate Video Retrieval}, year = {2020} }  Xiao Pan, Mingxuan Wang, Jiangtao Feng, Hao Zhou and Lei Li, "Pretraining Neural Machine Translation by Pretrained Neural Machine Translation", 2020. BibTeX: @unpublished{pan2020pretraining, author = {Xiao Pan and Mingxuan Wang and Jiangtao Feng and Hao Zhou and Lei Li}, title = {Pretraining Neural Machine Translation by Pretrained Neural Machine Translation}, year = {2020} }  Dongyu Ru, Yating Luo, Lin Qiu, Hao Zhou, Lei Li, Weinan Zhang and Yong Yu, "Active Sentence Learning by Adversarial Uncertainty Sampling in Discrete Space", 2020. BibTeX: @unpublished{ru2020active, author = {Dongyu Ru and Yating Luo and Lin Qiu and Hao Zhou and Lei Li and Weinan Zhang and Yong Yu}, title = {Active Sentence Learning by Adversarial Uncertainty Sampling in Discrete Space}, year = {2020} }  Zewei Sun, Mingxuan Wang, Hao Zhou, Chengqi Zhao, Shujian Huang, Jiajun Chen and Lei Li, "Diving into Document-Level Neural Machine Translation", 2020. BibTeX: @unpublished{sun2020diving, author = {Zewei Sun and Mingxuan Wang and Hao Zhou and Chengqi Zhao and Shujian Huang and Jiajun Chen and Lei Li}, title = {Diving into Document-Level Neural Machine Translation}, year = {2020} }  Youzhi Tian, Zhou Yu, Cheng Yang, Hang Li and Lei Li, "Conversational Contextualized Multimodal Representation Learning", 2020. BibTeX: @unpublished{tian2020conversational, author = {Youzhi Tian and Zhou Yu and Cheng Yang and Hang Li and Lei Li}, title = {Conversational Contextualized Multimodal Representation Learning}, year = {2020} }  Qingyang Wu, Lei Li and Zhou Yu, "TextGAIL: Generative Adversarial Imitation Learning for Text Generation", 2020. BibTeX: @unpublished{wu2020textgail, author = {Qingyang Wu and Lei Li and Zhou Yu}, title = {TextGAIL: Generative Adversarial Imitation Learning for Text Generation}, year = {2020} }  An Yan, Xin Wang, Jiangtao Feng, Lei Li and William Yang Wang, "Cross-Lingual Vision-Language Navigation", 2020. BibTeX: @unpublished{yan2020cross, author = {An Yan and Xin Wang and Jiangtao Feng and Lei Li and William Yang Wang}, title = {Cross-Lingual Vision-Language Navigation}, year = {2020} }  Yuxuan Song, Ning Miao, Hao Zhou, Lantao Yu, Mingxuan Wang and Lei Li, "Improving Maximum Likelihood Training for Text Generation with Density Ratio Estimation", In The 23rd International Conference on Artificial Intelligence and Statistics (AISTATS), 2020. BibTeX: @inproceedings{song2020improving, author = {Yuxuan Song and Ning Miao and Hao Zhou and Lantao Yu and Mingxuan Wang and Lei Li}, title = {Improving Maximum Likelihood Training for Text Generation with Density Ratio Estimation}, booktitle = {The 23rd International Conference on Artificial Intelligence and Statistics (AISTATS)}, year = {2020} }  Xinlong Wang, Tao Kong, Chunhua Shen, Yuning Jiang and Lei Li, "SOLO: Segmenting Objects by Locations", In The European Conference on Computer Vision (ECCV), 2020. Abstract: We present a new, embarrassingly simple approach to instance segmentation in images. Compared to many other dense prediction tasks, e.g., semantic segmentation, it is the arbitrary number of instances that have made instance segmentation much more challenging. In order to predict a mask for each instance, mainstream approaches either follow the 'detect-thensegment' strategy as used by Mask R-CNN, or predict category masks first then use clustering techniques to group pixels into individual instances. We view the task of instance segmentation from a completely new perspective by introducing the notion of "instance categories", which assigns categories to each pixel within an instance according to the instance's location and size, thus nicely converting instance mask segmentation into a classification-solvable problem. Now instance segmentation is decomposed into two classification tasks. We demonstrate a much simpler and flexible instance segmentation framework with strong performance, achieving on par accuracy with Mask R-CNN and outperforming recent singleshot instance segmenters in accuracy. We hope that this very simple and strong framework can serve as a baseline for many instance-level recognition tasks besides instance segmentation. BibTeX: @inproceedings{wang2020solo, author = {Xinlong Wang and Tao Kong and Chunhua Shen and Yuning Jiang and Lei Li}, title = {SOLO: Segmenting Objects by Locations}, booktitle = {The European Conference on Computer Vision (ECCV)}, year = {2020}, url = {https://arxiv.org/abs/1912.04488} }  Wenxian Shi, Hao Zhou, Ning Miao and Lei Li, "Dispersing Exponential Family Mixture VAEs for Interpretable Text Generation", In Proceedings of the 37th International Conference on Machine learning (ICML), 2020. BibTeX: @inproceedings{shi2020dispersed, author = {Wenxian Shi and Hao Zhou and Ning Miao and Lei Li}, title = {Dispersing Exponential Family Mixture VAEs for Interpretable Text Generation}, booktitle = {Proceedings of the 37th International Conference on Machine learning (ICML)}, year = {2020} }  Dongyu Ru, Zhenghui Wang, Lin Qiu, Hao Zhou, Lei Li, Weinan Zhang and Yong Yu, "QuAChIE: Question Answering based Chinese Information Extraction System", In the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR) - System Demonstrations, 2020. BibTeX: @inproceedings{ru2020quachie, author = {Dongyu Ru and Zhenghui Wang and Lin Qiu and Hao Zhou and Lei Li and Weinan Zhang and Yong Yu}, title = {QuAChIE: Question Answering based Chinese Information Extraction System}, booktitle = {the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval (SIGIR) - System Demonstrations}, year = {2020} }  Ning Miao, Yuxuan Song, Hao Zhou and Lei Li, "Do you have the right scissors? Tailoring Pre-trained Language Models via Monte-Carlo Methods", In the 58th Annual Meeting of the Association for Computational Linguistics (ACL) - short papers, 2020. Abstract: It has been a common approach to pre-train a language model on a large corpus and fine-tune it on task-specific data. In practice, we observe that fine-tuning a pre-trained model on a small dataset may lead to over- and/or under-estimation problem. In this paper, we propose MC-Taylor, a novel method to alleviate the above issue in text generation tasks by truncating and transferring the probability mass from over-estimated regions to under-estimated ones. Experiments on a variety of text generation datasets show that MC-Taylor consistently and significantly outperforms the fine-tuning approach. Our code is available at https://github.com/NingMiao/MC-tailor. BibTeX: @inproceedings{miao2020do, author = {Ning Miao and Yuxuan Song and Hao Zhou and Lei Li}, title = {Do you have the right scissors? Tailoring Pre-trained Language Models via Monte-Carlo Methods}, booktitle = {the 58th Annual Meeting of the Association for Computational Linguistics (ACL) - short papers}, year = {2020} }  Runxin Xu, Jun Cao, Mingxuan Wang, Jiaze Chen, Hao Zhou, Ying Zeng, Yuping Wang, Li Chen, Xiang Yin, Xijin Zhang, Songcheng Jiang, Yuxuan Wang and Lei Li, "Xiaomingbot: A Multilingual Robot News Reporter", In the 58th Annual Meeting of the Association for Computational Linguistics (ACL): System Demonstrations, 2020. Abstract: This paper proposes the building of Xiaomingbot, an intelligent, multilingual and multi-modal software robot equipped with four integral capabilities: news generation, news translation, news reading and avatar animation. Its system summarizes Chinese news that it automatically generates from data tables. Next, it translates the summary or the full article into multiple languages, and reads the multilingual rendition through synthesized speech. Notably, Xiaomingbot utilizes a voice cloning technology to synthesize the speech trained from a real person’s voice data in one input language. The proposed system enjoys several merits: it has an animated avatar, and is able to generate and read multilingual news. Since it was put into practice, Xiaomingbot has written over 600,000 articles, and gained over 150,000 followers on social media platforms. BibTeX: @inproceedings{xu2020xiaomingbot, author = {Runxin Xu and Jun Cao and Mingxuan Wang and Jiaze Chen and Hao Zhou and Ying Zeng and Yuping Wang and Li Chen and Xiang Yin and Xijin Zhang and Songcheng Jiang and Yuxuan Wang and Lei Li}, title = {Xiaomingbot: A Multilingual Robot News Reporter}, booktitle = {the 58th Annual Meeting of the Association for Computational Linguistics (ACL): System Demonstrations}, year = {2020}, url = {https://xiaomingbot.github.io} }  Xinyu Hua, Lei Li, Lifeng Hua and Lu Wang, "XREF: Entity Linking for Chinese News Comments with Supplementary Article Reference", In Automated Knowledge Base Construction (AKBC), 2020. Abstract: Automatic identification of mentioned entities in social media posts facilitates quick digestion of trending topics and popular opinions. Nonetheless, this remains a challenging task due to limited context and diverse name variations. In this paper, we study the problem of entity linking for Chinese news comments given mentions’ spans. We hypothesize that comments often refer to entities in the corresponding news article, as well as topics involving the entities. We therefore propose a novel model, XREF, that leverages attention mechanisms to (1) pinpoint relevant context within comments, and (2) detect supporting entities from the news article. To improve training, we make two contributions: (a) we propose a supervised attention loss in addition to the standard cross entropy, and (b) we develop a weakly supervised training scheme to utilize the large-scale unlabeled corpus. Two new datasets in entertainment and product domains are collected and annotated for experiments. Our proposed method outperforms previous methods on both datasets. BibTeX: @inproceedings{hua2020xref, author = {Xinyu Hua and Lei Li and Lifeng Hua and Lu Wang}, title = {XREF: Entity Linking for Chinese News Comments with Supplementary Article Reference}, booktitle = {Automated Knowledge Base Construction (AKBC)}, year = {2020}, url = {https://xinyuhua.github.io/Resources/akbc20/} }  Fei Wu, Cewu Lu, Mingjie Zhu, Hao Chen, Jun Zhu, Kai Yu, Lei Li, Ming Li, Qianfeng Chen, Xi Li, Xudong Cao, Zhongyuan Wang, Zhengjun Zha, Yueting Zhuang and Yunhe Pan, "Towards a new generation of artificial intelligence in China", Nature Machine Intelligence, Volume 2, pp. 312-316., 2020. BibTeX: @article{wu2020towards, author = {Wu, Fei and Lu, Cewu and Zhu, Mingjie and Chen, Hao and Zhu, Jun and Yu, Kai and Li, Lei and Li, Ming and Chen, Qianfeng and Li, Xi and Cao, Xudong and Wang, Zhongyuan and Zha, Zhengjun and Zhuang, Yueting and Pan, Yunhe}, title = {Towards a new generation of artificial intelligence in China}, journal = {Nature Machine Intelligence}, year = {2020}, volume = {2}, pages = {312-316}, doi = {https://doi.org/10.1038/s42256-020-0183-4} }  Rong Ye, Wenxian Shi, Hao Zhou, Zhongyu Wei and Lei Li, "Variational Template Machine for Data-to-Text Generation", In International Conference on Learning Representations (ICLR), 2020. Abstract: How to generate descriptions from structured data organized in tables? Existing approaches using neural encoder-decoder models often suffer from lacking diversity. We claim that an open set of templates is crucial for enriching the phrase constructions and realizing varied generations.Learning such templates is prohibitive since it often requires a large paired , which is seldom available. This paper explores the problem of automatically learning reusable "templates" from paired and non-paired data. We propose the variational template machine (VTM), a novel method to generate text descriptions from data tables. Our contributions include: a) we carefully devise a specific model architecture and losses to explicitly disentangle text template and semantic content information, in the latent spaces, and b) we utilize both small parallel data and large raw text without aligned tables to enrich the template learning. Experiments on datasets from a variety of different domains show that VTM is able generate more diversely while keeping a good fluency and quality. BibTeX: @inproceedings{ye2020variational, author = {Rong Ye and Wenxian Shi and Hao Zhou and Zhongyu Wei and Lei Li}, title = {Variational Template Machine for Data-to-Text Generation}, booktitle = {International Conference on Learning Representations (ICLR)}, year = {2020}, url = {https://openreview.net/forum?id=HkejNgBtPB} }  Zaixiang Zheng, Hao Zhou, Shujian Huang, Lei Li, Xinyu Dai and Jiajun Chen, "Mirror Generative Models for Neural Machine Translation", In International Conference on Learning Representations (ICLR), 2020. Abstract: Training neural machine translation models (NMT) requires a large amount of parallel corpus, which is scarce for many language pairs. However, raw non-parallel corpora are often easy to obtain. Existing approaches have not exploited the full potential of non-parallel bilingual data either in training or decoding. In this paper, we propose the mirror-generative NMT (MGNMT), a single unified architecture that simultaneously integrates the source to target translation model, the target to source translation model, and two language models. Both translation models and language models share the same latent semantic space, therefore both translation directions can learn from non-parallel data more effectively. Besides, the translation models and language models can collaborate together during decoding. Our experiments show that the proposed MGNMT consistently outperforms existing approaches in all a variety of scenarios and language pairs, including resource-rich and low-resource languages. BibTeX: @inproceedings{zheng2020mirror, author = {Zaixiang Zheng and Hao Zhou and Shujian Huang and Lei Li and Xinyu Dai and Jiajun Chen}, title = {Mirror Generative Models for Neural Machine Translation}, booktitle = {International Conference on Learning Representations (ICLR)}, year = {2020}, url = {https://openreview.net/forum?id=HkxQRTNYPH} }  Xunpeng Huang, Xianfeng Liang, Zhengyang Liu, Yue Yu and Lei Li, "SPAN: A Stochastic Projected Approximate Newton Method", In the 34th AAAI Conference on Artificial Intelligence (AAAI), 2020. Abstract: Second-order optimization methods have desirable convergence properties. However, the exact Newton method requires expensive computation for the Hessian and its inverse. In this paper, we propose SPAN, a novel approximate and fast Newton method. SPAN computes the inverse of the Hessian matrix via low-rank approximation and stochastic Hessian-vector products. Our experiments on multiple benchmark datasets demonstrate that SPAN outperforms existing first-order and second-order optimization methods in terms of the convergence wall-clock time. Furthermore, we provide a theoretical analysis of the per-iteration complexity, the approximation error, and the convergence rate. Both the theoretical analysis and experimental results show that our proposed method achieves a better trade-off between the convergence rate and the per-iteration efficiency. BibTeX: @inproceedings{huang2020span, author = {Xunpeng Huang and Xianfeng Liang and Zhengyang Liu and Yue Yu and Lei Li}, title = {SPAN: A Stochastic Projected Approximate Newton Method}, booktitle = {the 34th AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020} }  Xinlong Wang, Wei Yin, Tao Kong, Yuning Jiang, Lei Li and Chunhua Shen, "Task-Aware Monocular Depth Estimation for 3D Object Detection", In the 34th AAAI Conference on Artificial Intelligence (AAAI), 2020. Abstract: Monocular depth estimation enables 3D perception from a single 2D image, thus attracting much research attention for years. Almost all methods treat foreground and background regions (things and stuff'') in an image equally. However, not all pixels are equal. Depth of foreground objects plays a crucial role in 3D object recognition and localization. To date how to boost the depth prediction accuracy of foreground objects is rarely discussed. In this paper, we first analyse the data distributions and interaction of foreground and background, then propose the foreground-background separated monocular depth estimation (ForeSeE) method, to estimate the foreground depth and background depth using separate optimization objectives and depth decoders. Our method significantly improves the depth estimation performance on foreground objects. Applying ForeSeE to 3D object detection, we achieve 7.5 AP gains and set new state-of-the-art results among other monocular methods. BibTeX: @inproceedings{wang2020task, author = {Xinlong Wang and Wei Yin and Tao Kong and Yuning Jiang and Lei Li and Chunhua Shen}, title = {Task-Aware Monocular Depth Estimation for 3D Object Detection}, booktitle = {the 34th AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020} }  Qingyang Wu, Lei Li, Hao Zhou, Ying Zeng and Zhou Yu, "Importance-Aware Learning for Neural Headline Editing", In the 34th AAAI Conference on Artificial Intelligence (AAAI), 2020. Abstract: Many social media news writers are not professionally trained. Therefore, social media platforms have to hire professional editors to adjust amateur headlines to attract more readers. We propose to automate this headline editing process through neural network models to provide more immediate writing support for these social media news writers. To train such a neural headline editing model, we collected a dataset which contains articles with original headlines and professionally edited headlines. However, it is expensive to collect a large number of professionally edited headlines. To solve this low-resource problem, we design an encoder-decoder model which leverages large scale pre-trained language models. We further improve the pre-trained model's quality by introducing a headline generation task as an intermediate task before the headline editing task. Also, we propose Self Importance-Aware (SIA) loss to address the different levels of editing in the dataset by down-weighting the importance of easily classified tokens and sentences. With the help of Pre-training, Adaptation, and SIA, the model learns to generate headlines in the professional editor's style. Experimental results show that our method significantly improves the quality of headline editing comparing against previous methods. BibTeX: @inproceedings{wu2020importance, author = {Qingyang Wu and Lei Li and Hao Zhou and Ying Zeng and Zhou Yu}, title = {Importance-Aware Learning for Neural Headline Editing}, booktitle = {the 34th AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020}, url = {https://arxiv.org/abs/1912.01114} }  Jiacheng Yang, Mingxuan Wang, Hao Zhou, Chengqi Zhao, Weinan Zhang, Yong Yu and Lei Li, "Towards Making the Most of BERT in Neural Machine Translation", In the 34th AAAI Conference on Artificial Intelligence (AAAI), 2020. Abstract: GPT-2 and BERT demonstrate the effectiveness of using pre-trained language models (LMs) on various natural language processing tasks. However, LM fine-tuning often suffers from catastrophic forgetting when applied to resource-rich tasks.In this work, we introduce a concerted training framework ( that is the key to integrate the pre-trained LMs to neural machine translation (NMT).Our proposed method consists of three techniques:inparaenum[a)]\item asymptotic distillation to ensure that the NMT model can retain the previous pre-trained knowledge;\item a dynamic switching gate to avoid catastrophic forgetting of pre-trained knowledge; and\item a strategy to adjust the learning paces according to a scheduled policy.\end{inparaenum}Our experiments in machine translation show \method gains of up to 3 BLEU score on the WMT14 English-German language pair which even surpasses the previous state-of-the-art pre-training aided NMT by 1.4 BLEU score.While for the large WMT14 English-French task with 40 millions of sentence-pairs, our base model still significantly improves upon the state-of-the-art Transformer big model by more than 1 BLEU score. BibTeX: @inproceedings{yang2020towards, author = {Jiacheng Yang and Mingxuan Wang and Hao Zhou and Chengqi Zhao and Weinan Zhang and Yong Yu and Lei Li}, title = {Towards Making the Most of BERT in Neural Machine Translation}, booktitle = {the 34th AAAI Conference on Artificial Intelligence (AAAI)}, year = {2020} }  Ning Miao, Hao Zhou, Chengqi Zhao, Wenxian Shi and Lei Li, "Kernelized Bayesian Softmax for Text Generation", In the 33rd Conference on Neural Information Processing Systems (NeurIPS), 2019. BibTeX: @inproceedings{miao2019kernelized, author = {Miao, Ning and Zhou, Hao and Zhao, Chengqi and Shi, Wenxian and Li, Lei}, title = {Kernelized Bayesian Softmax for Text Generation}, booktitle = {the 33rd Conference on Neural Information Processing Systems (NeurIPS)}, year = {2019} }  Zhichen Zhao, Lei Li, Bowen Zhang, Meng Wang, Yuning Jiang, Li Xu, Fengkun Wang and Weiying Ma, "What You Look Matters: Offline Evaluation of Advertising Creatives for Cold Start Problem", In the 28th ACM International Conference on Information and Knowledge Management (CIKM), 2019. Abstract: Modern online-auction-based advertising systems utilize user and item features to automatically place ads. In order to train a model to rank the most profitable ads, new ad creatives have to be placed online for hours to receive sufficient user-click data. This corresponds to the cold-start stage. Random strategy lead to inefficiency and inferior selections of potential ads. In this paper, we analyze the effectiveness of content-based selection during the cold-start stage. Specifically, we propose Pre Evaluation of Ad Creative Model (PEAC), a novel method to evaluate and select ad creatives offline before being placed online. Our proposed PEAC utilizes the automatically extracted deep feature from ad content to predict and rank their potential online placement performance. It does not rely on any user-click data, which is scarce during the cold-starting phase. A large-scale system based on our method has been deployed in a real online advertising platform. The online A/B testing shows the ads system with PEAC pre-ranking obtains significant improvement in revenue gain compared to the prior system. Furthermore, we provide detailed analyses on what the model learned, which gives further suggestions to improve ad creative design. BibTeX: @inproceedings{zhao2019what, author = {Zhao, Zhichen and Li, Lei and Zhang, Bowen and Wang, Meng and Jiang, Yuning and Xu, Li and Wang, Fengkun and Ma, Weiying}, title = {What You Look Matters: Offline Evaluation of Advertising Creatives for Cold Start Problem}, booktitle = {the 28th ACM International Conference on Information and Knowledge Management (CIKM)}, year = {2019} }  Mingxuan Wang, Jun Xie, Zhixing Tan, Jinsong Su, Deyi Xiong and Lei Li, "Towards Linear Time Neural Machine Translation with Capsule Networks", In the Conference on Empirical Methods in Natural Language Processing (EMNLP), 2019. Abstract: In this study, we first investigate a novel capsule network with dynamic routing for linear time Neural Machine Translation (NMT), referred as CAPSNMT. CAPSNMT uses an aggregation mechanism to map the source sentence into a matrix with pre-determined size, and then applys a deep LSTM network to decode the target sequence from the source representation. Unlike the previous work (Sutskever et al., 2014) to store the source sentence with a passive and bottom-up way, the dynamic routing policy encodes the source sentence with an iterative process to decide the credit attribution between nodes from lower and higher layers. CAPSNMT has two core properties: it runs in time that is linear in the length of the sequences and provides a more flexible way to aggregate the part-whole information of the source sentence. On WMT14 English-German task and a larger WMT14 English-French task, CAPSNMT achieves comparable results with the Transformer system. We also devise new hybrid architectures intended to combine the strength of CAPSNMT and the RNMT model. Our hybrid models obtain state-of-the-arts results on both benchmark datasets. To the best of our knowledge, this is the first work that capsule networks have been empirically inves- tigated for sequence to sequence problems BibTeX: @inproceedings{wang2019towards, author = {Wang, Mingxuan and Xie, Jun and Tan, Zhixing and Su, Jinsong and Xiong, Deyi and Li, Lei}, title = {Towards Linear Time Neural Machine Translation with Capsule Networks}, booktitle = {the Conference on Empirical Methods in Natural Language Processing (EMNLP)}, year = {2019} }  Qing-Yuan Jiang, Yi He, Gen Li, Jian Lin, Lei Li and Wu-Jun Li., "SVD: A Large-Scale Short Video Dataset for Near Duplicate Video Retrieval.", In International Conference on Computer Vision (ICCV), 2019. Abstract: With the explosive growth of video data in real applications, near-duplicate video retrieval (NDVR) has become indispensable and challenging, especially for short videos. However, all existing NDVR datasets are introduced for long videos. Furthermore, most of them are small-scale and lack of diversity due to the high cost of collecting and labeling near-duplicate videos. In this paper, we introduce a large-scale short video dataset, called SVD, for the NDVR task. SVD contains over 500,000 short videos and over 30,000 labeled videos of near-duplicates. We use multiple video mining techniques to construct positive/negative pairs. Furthermore, we design temporal and spatial transformations to mimic user-attack behavior in real applications for constructing more difficult variants of SVD. Experiments show that existing state-of-the-art NDVR methods, including real-value based and hashing based methods, fail to achieve satisfactory performance on this challenging dataset. The release of SVD dataset will foster research and system engineering in the NDVR area. The SVD dataset is available at https://svdbase.github.io. BibTeX: @inproceedings{jiang2019svd, author = {Jiang, Qing-Yuan and He, Yi and Li, Gen and Lin, Jian and Li, Lei and Li., Wu-Jun}, title = {SVD: A Large-Scale Short Video Dataset for Near Duplicate Video Retrieval.}, booktitle = {International Conference on Computer Vision (ICCV)}, year = {2019}, url = {https://svdbase.github.io} }  Xin Wang, Jiawei Wu, Junkun Chen, Lei Li, Yuan-Fang Wang and William Yang Wang, "VATEX: A Large-Scale, High-Quality Multilingual Dataset for Video-and-Language Research", In International Conference on Computer Vision (ICCV), 2019. Abstract: We present a new large-scale multilingual video description dataset, VATEX, which contains over 41,250 videos and 825,000 captions in both English and Chinese. Among the captions, there are over 206,000 English-Chinese parallel translation pairs. Compared to the widely-used MSR-VTT dataset, VATEX is multilingual, larger, linguistically complex, and more diverse in terms of both video and natural language descriptions. We also introduce two tasks for video-and-language research based on VATEX: (1) Multilingual Video Captioning, aimed at describing a video in various languages with a compact unified captioning model, and (2) Video-guided Machine Translation, to translate a source language description into the target language using the video information as additional spatiotemporal context. Extensive experiments on the VATEX dataset show that, first, the unified multilingual model can not only produce both English and Chinese descriptions for a video more efficiently, but also offer improved performance over the monolingual models. Furthermore, we demonstrate that the spatiotemporal video context can be effectively utilized to align source and target languages and thus assist machine translation. In the end, we discuss the potentials of using VATEX for other video-and-language research. BibTeX: @inproceedings{wang2019vatex, author = {Wang, Xin and Wu, Jiawei and Chen, Junkun and Li, Lei and Wang, Yuan-Fang and Wang, William Yang}, title = {VATEX: A Large-Scale, High-Quality Multilingual Dataset for Video-and-Language Research}, booktitle = {International Conference on Computer Vision (ICCV)}, year = {2019}, url = {https://vatex.org/main/index.html} }  Yao Fu, Hao Zhou, Jiaze Chen and Lei Li, "Rethinking Text Attribute Transfer: A Lexical Analysis", In the 12th International Conference on Natural Language Generation (INLG), 2019. Abstract: Text attribute transfer is modifying certain linguistic attributes (e.g. sentiment, style, authorship, etc.) of a sentence and transforming them from one type to another. In this paper, we aim to analyze and interpret what is changed during the transfer process. We start from the observation that in many existing models and datasets, certain words within a sentence play important roles in determining the sentence attribute class. These words are referred to as the Pivot Words. Based on these pivot words, we propose a lexical analysis framework, the Pivot Analysis, to quantitatively analyze the effects of these words in text attribute classification and transfer. We apply this framework to existing datasets and models, and show that: (1) the pivot words are strong features for the classification of sentence attributes; (2) to change the attribute of a sentence, many datasets only requires to change certain pivot words; (3) consequently, many transfer models only perform the lexical-level modification, while leaving higher-level sentence structures unchanged. Our work provides an in-depth understanding of linguistic attribute transfer and further identifies the future requirements and challenges of this task. BibTeX: @inproceedings{fu2019rethinking, author = {Fu, Yao and Zhou, Hao and Chen, Jiaze and Li, Lei}, title = {Rethinking Text Attribute Transfer: A Lexical Analysis}, booktitle = {the 12th International Conference on Natural Language Generation (INLG)}, year = {2019} }  Yunfei Lu, Linyun Yu, Peng Cui, Chengxi Zang, Renzhe Xu, Yihao Liu, Lei Li and Wenwu Zhu, "Uncovering the Co-driven Mechanism of Social and Content Links in User Churn Phenomena", In the 25th SIGKDD Conference on Knowledge Discovery and Data Mining (KDD), New York, NY, USA ACM, 2019. Abstract: Recent years witness the merge of social networks and user-generatedcontent (UGC) platforms. In these new platforms, users establishlinks to others not only driven by their social relationships in thephysical world but also driven by the contents published by others.During this merging process, social networks gradually integrateboth social and content links and become unprecedentedly complicated,with the motivation to exploit both the advantages of socialviscosity and content attractiveness to reach the best customerretention situation. However, due to the lack of fine-grained datarecording such merging phenomena, the co-driven mechanism ofsocial and content links in churn phenomena remains unexplored.How do social and content factors jointly influence customers’churn? What is the best ratio of social and content links for a user’sretention? Is there a model to capture this co-driven mechanism inusers’ churn phenomena?In this paper, we collect a real-world dataset with more than 5.77million users and 925 million links, with each link being tagged asa social one or a content one. We find that both social and contentlinks have a significant impact on users’ churn and theywork jointlyas a complicated mixture effect. As a result, we propose a novelsurvival model, which incorporates both social and content factors,to predict churn probability over time. Our model successfully fitsthe churn distribution in reality and accurately predicts the churnrate of different subpopulations in the future. By analyzing themodeling parameters, we try to strike a balance between socialdrivenand content-driven links in a user’s social network to reachthe lowest churn rate. Our model and findings may have potentialimplications for the design of future social media. BibTeX: @inproceedings{lu2019uncovering, author = {Lu, Yunfei and Yu, Linyun and Cui, Peng and Zang, Chengxi and Xu, Renzhe and Liu, Yihao and Li, Lei and Zhu, Wenwu}, title = {Uncovering the Co-driven Mechanism of Social and Content Links in User Churn Phenomena}, booktitle = {the 25th SIGKDD Conference on Knowledge Discovery and Data Mining (KDD)}, publisher = {ACM}, year = {2019} }  Zhaoyue Sun, Jiaze Chen, Hao Zhou, Deyu Zhou, Lei Li and Mingmin Jiang, "GraspSnooker: Automatic Chinese Commentary Generation for Snooker Videos", In the 28th International Joint Conference on Artificial Intelligence (IJCAI), pp. 6569-6571., 2019. BibTeX: @inproceedings{sun2019graspsnooker, author = {Sun, Zhaoyue and Chen, Jiaze and Zhou, Hao and Zhou, Deyu and Li, Lei and Jiang, Mingmin}, title = {GraspSnooker: Automatic Chinese Commentary Generation for Snooker Videos}, booktitle = {the 28th International Joint Conference on Artificial Intelligence (IJCAI)}, year = {2019}, pages = {6569--6571}, url = {https://doi.org/10.24963/ijcai.2019/959}, doi = {https://doi.org/10.24963/ijcai.2019/959} }  Rongxiang Weng, Hao Zhou, Shujian Huang, Yifan Xia, Lei Li and Jiajun Chen, "Correct-and-Memorize: Learning to Translate from Interactive Revisions", In the 28th International Joint Conference on Artificial Intelligence (IJCAI), 2019. Abstract: State-of-the-art machine translation models are stillnot on a par with human translators. Previous worktakes human interactions into the neural machine translation process to obtain improved results in target languages. However, not all model-translation errors are equal some are critical while others are minor. In the mean while, same translation mistakes occur repeatedly in similar context. To solve bothissues, we propose CAMIT, a novel method for translating in an interactive environment. Our proposed method works with critical revision instructions,therefore allows human to correct arbitrary words in model-translated sentences. In addition,CAMIT learns from and softly memorizes revision actions based on the context, alleviating the issue of repeating mistakes. Experiments in both ideal and real interactive translation settings demonstrate that our proposed CAMIT enhances machine translation results significantly while requires fewer revision instructions from human compared to previous methods. BibTeX: @inproceedings{weng2019correct, author = {Weng, Rongxiang and Zhou, Hao and Huang, Shujian and Xia, Yifan and Li, Lei and Chen, Jiajun}, title = {Correct-and-Memorize: Learning to Translate from Interactive Revisions}, booktitle = {the 28th International Joint Conference on Artificial Intelligence (IJCAI)}, year = {2019} }  Yu Bao, Hao Zhou, Shujian Huang, Lei Li, Lili Mou, Olga Vechtomova, Xinyu Dai and Jiajun Chen, "Generating Sentences from Disentangled Syntactic and Semantic Spaces", In the 57th Annual Meeting of the Association for Computational Linguistics (ACL), 2019. Abstract: Variational auto-encoders (VAEs) are widely used in natural language generation due to the regularization of the latent space. However, generating sentences from the continuous latent space does not explicitly model the syntactic information. In this paper, we propose to generate sentences from disentangled syntactic and semantic spaces. Our proposed method explicitly models syntactic information in the VAE’s latent space by using the linearized tree sequence, leading to better performance of language generation. Additionally, the advantage of sampling in the disentangled syntactic and semantic latent spaces enables us to perform novel applications, such as the unsupervised paraphrase generation and syntax-transfer generation. Experimental results show that our proposed model achieves similar or better performance in various tasks, compared with state-of-the-art related work. BibTeX: @inproceedings{bao2019generating, author = {Bao, Yu and Zhou, Hao and Huang, Shujian and Li, Lei and Mou, Lili and Vechtomova, Olga and Dai, Xinyu and Chen, Jiajun}, title = {Generating Sentences from Disentangled Syntactic and Semantic Spaces}, booktitle = {the 57th Annual Meeting of the Association for Computational Linguistics (ACL)}, year = {2019} }  Lin Qiu, Yunxuan Xiao, Yanru Qu, Hao Zhou, Lei Li, Weinan Zhang and Yong Yu, "Dynamically Fused Graph Network for Multi-hop Reasoning", In the 57th Annual Meeting of the Association for Computational Linguistics (ACL), 2019. Abstract: Text-based question answering (TBQA) has been studied extensively in recent years. Most existing approaches focus on finding the answer to a question within a single paragraph. However, many difficult questions require multiple supporting evidence from scattered text across two or more documents. In this paper, we propose the Dynamically Fused Graph Network (DFGN), a novel method to answer those questions requiring multiple scattered evidence and reasoning over them. Inspired by human’s step-by-step reasoning behavior, DFGN includes a dynamic fusion layer that starts from the entities mentioned in the given query, explores along the entity graph dynamically built from the text, and gradually finds relevant supporting entities from the given documents. We evaluate DFGN on HotpotQA, a public TBQA dataset requiring multi-hop reasoning. DFGN achieves competitive results on the public board. Furthermore, our analy- sis shows DFGN could produce interpretable reasoning chains. BibTeX: @inproceedings{qiu2019dynamically, author = {Qiu, Lin and Xiao, Yunxuan and Qu, Yanru and Zhou, Hao and Li, Lei and Zhang, Weinan and Yu, Yong}, title = {Dynamically Fused Graph Network for Multi-hop Reasoning}, booktitle = {the 57th Annual Meeting of the Association for Computational Linguistics (ACL)}, year = {2019} }  Huangzhao Zhang, Ning Miao, Hao Zhou and Lei Li, "Generating Fluent Adversarial Examples for Natural Languages", In the 57th Annual Meeting of the Association for Computational Linguistics (ACL) - short papers, 2019. Abstract: Efficiently building an adversarial attacker fornatural language processing (NLP) tasks is areal challenge. Firstly, as the sentence spaceis discrete, it is difficult to make small perturbations along the direction of gradients. Secondly,the fluency of the generated examples can not be guaranteed. In this paper, we propose MHA, which addresses both problemsby performing Metropolis-Hastings sampling,whose proposal is designed with the guidanceof gradients. Experiments on IMDB and SNLIshow that our proposed MHA outperforms thebaseline model on attacking capability. Adversarial training with MHA also leads to better robustness and performance. BibTeX: @inproceedings{zhang2019generating, author = {Zhang, Huangzhao and Miao, Ning and Zhou, Hao and Li, Lei}, title = {Generating Fluent Adversarial Examples for Natural Languages}, booktitle = {the 57th Annual Meeting of the Association for Computational Linguistics (ACL) - short papers}, year = {2019} }  Hao Wu, Jiayuan Mao, Yufeng Zhang, Weiwei Sun, Yuning Jiang, Lei Li and Wei-Ying Ma, "Unified Visual-Semantic Embeddings: Bridging Vision and Language with Structured Meaning Representations", In The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2019. Abstract: We propose Unified Visual-Semantic Embeddings (VSE)for learning a joint space for scene representation and textualsemantics. It unifies the embeddings of concepts at differentlevels: objects, attributes, relations and full scenes. Weview the sentential semantics as a combination of differentsemantic components such as object or relational descriptors,and align their embeddings with different regions of ascene. A contrastive learning approach is proposed for theeffective learning of such fine-grained alignment from onlyimage-caption pairs. We also present a simple yet effectiveapproach that enforces the coverage of caption embeddingson the semantic components that appear in the sentence. Wedemonstrate that the Unified VSE outperforms other baselineson cross-modal retrieval tasks and the enforcementof the semantic coverage improves models’ robustness indefending text-domain adversarial attacks. Moreover, suchrobustness empowers the use of visual cues to accuratelyresolve word dependencies in novel sentences. BibTeX: @inproceedings{wu2019unified, author = {Wu, Hao and Mao, Jiayuan and Zhang, Yufeng and Sun, Weiwei and Jiang, Yuning and Li, Lei and Ma, Wei-Ying}, title = {Unified Visual-Semantic Embeddings: Bridging Vision and Language with Structured Meaning Representations}, booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, year = {2019} }  Ning Miao, Hao Zhou, Lili Mou, Rui Yan and Lei Li, "CGMH: Constrained Sentence Generation by Metropolis-Hastings Sampling", In the 33rd AAAI Conference on Artificial Intelligence (AAAI), 2019. Abstract: In real-world applications of natural language generation,there are often constraints on the target sentences in additionto fluency and naturalness requirements. Existing languagegeneration techniques are usually based on recurrentneural networks (RNNs). However, it is non-trivial to imposeconstraints on RNNs while maintaining generation quality,since RNNs generate sentences sequentially (or with beamsearch) from the first word to the last. In this paper, we proposeCGMH, a novel approach using Metropolis-Hastingssampling for constrained sentence generation. CGMH allowscomplicated constraints such as the occurrence of multiplekeywords in the target sentences, which cannot be handled intraditional RNN-based approaches. Moreover, CGMH worksin the inference stage, and does not require parallel corporafor training.We evaluate our method on a variety of tasks, includingkeywords-to-sentence generation, unsupervised sentenceparaphrasing, and unsupervised sentence error correction.CGMH achieves high performance compared with previoussupervised methods for sentence generation. Our codeis released at https://github.com/NingMiao/CGMH BibTeX: @inproceedings{miao2019cgmh, author = {Miao, Ning and Zhou, Hao and Mou, Lili and Yan, Rui and Li, Lei}, title = {CGMH: Constrained Sentence Generation by Metropolis-Hastings Sampling}, booktitle = {the 33rd AAAI Conference on Artificial Intelligence (AAAI)}, year = {2019}, url = {http://arxiv.org/abs/1811.10996} }  Wei Cao, Dong Wang, Jian Li, Hao Zhou, Yitan Li and Lei Li, "BRITS: Bidirectional Recurrent Imputation for Time Series", In the 32nd Conference on Neural Information Processing Systems (NeurIPS), 2018. Abstract: Time series are widely used as signals in many classification/regression tasks. It is ubiquitous that time series contains many missing values. Given multiple correlated time series data, how to fill in missing values and to predict their class labels? Existing imputation methods often impose strong assumptions of the underlying data generating process, such as linear dynamics in the state space. In this paper, we propose BRITS, a novel method based on recurrent neural networks for missing value imputation in time series data. Our proposed method directly learns the missing values in a bidirectional recurrent dynamical system, without any specific assumption. The imputed values are treated as variables of RNN graph and can be effectively updated during the backpropagation.BRITS has three advantages: (a) it can handle multiple correlated missing values in time series; (b) it generalizes to time series with nonlinear dynamics underlying; (c) it provides a data-driven imputation procedure and applies to general settings with missing data.We evaluate our model on three real-world datasets, including an air quality dataset, a health-care data, and a localization data for human activity. Experiments show that our model outperforms the state-of-the-art methods in both imputation and classification/regression accuracies. BibTeX: @inproceedings{cao2018brits, author = {Cao, Wei and Wang, Dong and Li, Jian and Zhou, Hao and Li, Yitan and Li, Lei}, title = {BRITS: Bidirectional Recurrent Imputation for Time Series}, booktitle = {the 32nd Conference on Neural Information Processing Systems (NeurIPS)}, year = {2018}, url = {https://arxiv.org/abs/1805.10572} }  Haoyue Shi, Hao Zhou, Jiaze Chen and Lei Li, "On Tree-Based Neural Sentence Modeling", In Conference on Empirical Methods in Natural Language Processing (EMNLP), 2018. Abstract: Neural networks with tree-based sentence encoders have shown better results on many downstream tasks. Most of existing tree-based encoders adopt syntactic parsing trees as the explicit structure prior. To study the effectiveness of different tree structures, we replace the parsing trees with trivial trees (i.e., binary balanced tree, left-branching tree and right-branching tree) in the encoders. Though trivial trees contain no syntactic information, those encoders get competitive or even better results on all of the ten downstream tasks we investigated. This surprising result indicates that explicit syntax guidance may not be the main contributor to the superior performances of tree-based neural sentence modeling. Further analysis show that tree modeling gives better results when crucial words are closer to the final representation. Additional experiments give more clues on how to design an effective tree-based encoder. BibTeX: @inproceedings{shi2018tree, author = {Shi, Haoyue and Zhou, Hao and Chen, Jiaze and Li, Lei}, title = {On Tree-Based Neural Sentence Modeling}, booktitle = {Conference on Empirical Methods in Natural Language Processing (EMNLP)}, year = {2018}, url = {https://arxiv.org/abs/1808.09644} }  Gen Li, Shikun Xu, Xiang Liu, Lei Li and Changhu Wang, "Jersey Number Recognition with Semi-Supervised Spatial Transformer Network", In IEEE Conference on Computer Vision and Pattern Recognition workshops, Computer Vision in Sports, pp. 1864 -1871., 2018. Abstract: It is still a challenging task to recognize the jersey number of players on the court in soccer match videos, as thejersey numbers are very small in the object detection taskand annotated data are not easy to collect. Based on theobject detection results of all the players on the court, aCNN model is first introduced to classify these numbers onthe deteced players’ images. To localize the jersey numbermore precisely without involving another digit detector andextra consumption, we then improve the former network toan end-to-end framework by fusing with the spatial transformernetwork (STN). To further improve the accuracy, webring extra supervision to STN and upgrade the model toa semi-supervised multi-task learning system, by labeling asmall portion of the number areas in the dataset by quadrangle.Extensive experiments illustrate the effectiveness ofthe proposed framework. BibTeX: @inproceedings{li2018jersey, author = {Li, Gen and Xu, Shikun and Liu, Xiang and Li, Lei and Wang, Changhu}, title = {Jersey Number Recognition with Semi-Supervised Spatial Transformer Network}, booktitle = {IEEE Conference on Computer Vision and Pattern Recognition workshops, Computer Vision in Sports}, year = {2018}, pages = {1864 --1871} }  Jiawei Wu, Lei Li and William Yang Wang, "Reinforced Co-Training", In Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT), New Orleans, Louisiana, pp. 1252-1262. Association for Computational Linguistics, 2018. Abstract: Co-training is a popular semi-supervised learning framework to utilize alarge amount of unlabeled data in addition to a small labeled set. Co-trainingmethods exploit predicted labels on the unlabeled data and select samples basedon prediction confidence to augment the training. However, the selection ofsamples in existing co-training methods is based on a predetermined policy,which ignores the sampling bias between the unlabeled and the labeled subsets,and fails to explore the data space. In this paper, we propose a novel method,Reinforced Co-Training, to select high-quality unlabeled samples to betterco-train on. More specifically, our approach uses Q-learning to learn a dataselection policy with a small labeled dataset, and then exploits this policy totrain the co-training classifiers automatically. Experimental results onclickbait detection and generic text classification tasks demonstrate that ourproposed method can obtain more accurate text classification results. BibTeX: @inproceedings{wu2018reinforced, author = {Wu, Jiawei and Li, Lei and Wang, William Yang}, title = {Reinforced Co-Training}, booktitle = {Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (NAACL-HLT)}, publisher = {Association for Computational Linguistics}, year = {2018}, pages = {1252--1262}, url = {http://arxiv.org/abs/1804.06035} }  Yusuf B. Erol, Yi Wu, Lei Li and Stuart Russell, "A Nearly-Black-Box Online Algorithm for Joint Parameter and State Estimation in Temporal Models", In the 31st AAAI Conference on Artificial Intelligence (AAAI), 2017. Abstract: Online joint parameter and state estimation is a core problem for temporal models. Most existing methods are either restricted to a particular class of models (e.g., the Storvik filter) or computationally expensive (e.g., particle MCMC). We propose a novel nearly-black-box algorithm, the Assumed Parameter Filter (APF), a hybrid of particle filtering for state variables and assumed density filtering for parameter variables. It has the following advantages: (a) it is online and computationally efficient; (b) it is applicable to both discrete and continuous parameter spaces with arbitrary transition dynamics. On a variety of toy and real models, APF generates more accurate results within a fixed computation budget compared to several standard algorithms from the literature. BibTeX: @inproceedings{erol2017nearly, author = {Erol, Yusuf B. and Wu, Yi and Li, Lei and Russell, Stuart}, title = {A Nearly-Black-Box Online Algorithm for Joint Parameter and State Estimation in Temporal Models}, booktitle = {the 31st AAAI Conference on Artificial Intelligence (AAAI)}, year = {2017} }  Yasuko Matsubara, Yasushi Sakurai, B. Aditya Prakash, Lei Li and Christos Faloutsos, "Non-linear Dynamics of Information Diffusion in Social Networks", ACM Transactions on the Web, Volume 11(1), 2017. Abstract: The recent explosion in the adoption of search engines and new media such as blogs and Twitter have facilitated the faster propagation of news and rumors. How quickly does a piece of news spread over these media? How does its popularity diminish over time? Does the rising and falling pattern follow a simple universal law? In this paper, we propose SPIKEM, a concise yet flexible analytical model of the rise and fall patterns of information diffusion. Our model has the following advantages: (a) unification power: it explains earlier empirical observations and generalizes theoretical models including the SI and SIR models. We provide the threshold of the take-off vs. die-out conditions for SPIKEM, and discuss the generality of our model, by applying it to an arbitrary graph topology; (b) practicality: it matches the observed behavior of diverse sets of real data; (c) parsimony: it requires only a handful of parameters; and (d) usefulness: it makes it possible to perform analytic tasks such as forecasting, spotting anomalies, and interpretation by reverse engineering the system parameters of interest (e.g. quality of news, number of interested bloggers, etc.). We also introduce an efficient and effective algorithm for the real-time monitoring of information diffusion, namely, SPIKESTREAM, which identifies multiple diffusion patterns in a large collection of online event streams. Extensive experiments on real datasets demonstrate that SPIKEM accurately and succinctly describes all the patterns of the rise-and-fall spikes in social networks. BibTeX: @article{matsubara2017non, author = {Matsubara, Yasuko and Sakurai, Yasushi and Prakash, B. Aditya and Li, Lei and Faloutsos, Christos}, title = {Non-linear Dynamics of Information Diffusion in Social Networks}, journal = {ACM Transactions on the Web}, year = {2017}, volume = {11}, number = {1} }  Yi Wu, Lei Li, Stuart J. Russell and Rastislav Bodik, "Swift: Compiled Inference for Probabilistic Programming Languages", In 25th International Joint Conference on Artificial Intelligence (IJCAI), 2016. Abstract: A probabilistic program defines a probability measure over its semantic structures. One common goal of probabilistic programming languages (PPLs) is to compute posterior probabilities for arbitrary models and queries, given observed evidence, using a generic inference engine. Most PPL inference engines—even the compiled ones—incur significant runtime interpretation overhead, especially for contingent and open-universe models. This paper describes Swift, a compiler for the BLOG PPL. Swift-generated code incorporates optimizations that eliminate interpretation overhead, maintain dynamic dependencies efficiently, and handle memory management for possible worlds of varying sizes. Experiments comparing Swift with other PPL engines on a variety of inference problems demonstrate speedups ranging from 12x to 326x. BibTeX: @inproceedings{wu2016swift, author = {Wu, Yi and Li, Lei and Russell, Stuart J. and Bodik, Rastislav}, title = {Swift: Compiled Inference for Probabilistic Programming Languages}, booktitle = {25th International Joint Conference on Artificial Intelligence (IJCAI)}, year = {2016} }  Zihang Dai, Lei Li and Wei Xu, "CFO: Conditional Focused Neural Question Answering with Large-scale Knowledge Bases", In the 54th Annual Meeting of the Association for Computational Linguistics (ACL), 2016. Abstract: How can we enable computers to automatically answer questions like Who created the character Harry Potter''? Carefully built knowledge bases provide rich sources of facts. However, it remains a challenge to answer factoid questions raised in natural language due to numerous expressions of one question. In particular, we focus on the most common questions --- ones that can be answered with a single fact in the knowledge base. We propose CFO, a Conditional Focused neural-network-based approach to answering factoid questions with knowledge bases. Our approach first zooms in a question to find more probable candidate subject mentions, and infers the final answers with a unified conditional probabilistic framework. Powered by deep recurrent neural networks and neural embeddings, our proposed CFO achieves an accuracy of 75.7% on a dataset of 108k questions - the largest public one to date. It outperforms the current state of the art by an absolute margin of 11.8%. BibTeX: @inproceedings{dai2016cfo, author = {Dai, Zihang and Li, Lei and Xu, Wei}, title = {CFO: Conditional Focused Neural Question Answering with Large-scale Knowledge Bases}, booktitle = {the 54th Annual Meeting of the Association for Computational Linguistics (ACL)}, year = {2016} }  Zefu Lu, Lei Li and Wei Xu, "Twisted Recurrent Network for Named Entity Recognition" , 2015. BibTeX: @misc{lu2015twisted, author = {Lu, Zefu and Li, Lei and Xu, Wei}, title = {Twisted Recurrent Network for Named Entity Recognition}, booktitle = {Bay Area Machine Learning Symposium}, year = {2015} }  Hieu Pham, Zihang Dai and Lei Li, "On Optimization Algorithms for Recurrent Networks with Long Short-Term Memory" , 2015. BibTeX: @misc{pham2015optimization, author = {Pham, Hieu and Dai, Zihang and Li, Lei}, title = {On Optimization Algorithms for Recurrent Networks with Long Short-Term Memory}, booktitle = {Bay Area Machine Learning Symposium}, year = {2015} }  Yi Wu, Lei Li and Stuart J. Russell, "BFiT: From Possible-World Semantics to Random-Evaluation Semantics in Open Universe", In Neural Information Processing Systems, Probabilistic Programming workshop, 2014. Abstract: In recent years, several probabilistic programming languages (PPLs) have emerged, such as Bayesian Logic (BLOG), Church, and Figaro. These languages can be classified into two categories: PPLs interpreted using possible-world se- mantics and ones using random-evaluation semantics. In this paper, we explic- itly analyze the equivalence between these two semantics in the context of open- universe probability models (OUPMs). We propose a novel dynamic memoization technique to construct OUPMs using procedural instructions in random-evaluation based PPLs. We implemented a translator named BFiT, which converts code in BLOG (possible-world based) to Figaro (random-evaluation based). The trans- lated program in Figaro exhibits a merely constant blowup factor in program size while yielding the same inference results as the original model in BLOG. BibTeX: @inproceedings{wu2014bfit, author = {Wu, Yi and Li, Lei and Russell, Stuart J.}, title = {BFiT: From Possible-World Semantics to Random-Evaluation Semantics in Open Universe}, booktitle = {Neural Information Processing Systems, Probabilistic Programming workshop}, year = {2014} }  Simon Shaolei Du, Yilin Liu, Boyi Chen and Lei Li, "Maxios: Large Scale Nonnegative Matrix Factorization for Collaborative Filtering", In Neural Information Processing Systems, workshop on Distributed Machine Learning and Matrix Computations, 2014. Abstract: Nonnegative matrix factorization proved useful in many applications, including collaborative filtering – from existing ratings data one would like to predict new product ratings by users. However, factorizing a user-product score matrix is computation and memory intensive. We propose Maxios, a novel approach to fill missing values for large scale and highly sparse matrices efficiently and ac- curately. We formulate the matrix-completion problem as weighted nonnegative matrix factorization. In addition, we develop distributed update rules using alter- nating direction method of multipliers. We have implemented the Maxios system on top of Spark, a distributed in-memory computation framework. Experiments on commercial clusters show that Maxios is competitive in terms of scalability and accuracy against the existing solutions on a variety of datasets. BibTeX: @inproceedings{du2014maxios, author = {Du, Simon Shaolei and Liu, Yilin and Chen, Boyi and Li, Lei}, title = {Maxios: Large Scale Nonnegative Matrix Factorization for Collaborative Filtering}, booktitle = {Neural Information Processing Systems, workshop on Distributed Machine Learning and Matrix Computations}, year = {2014} }  Da-Cheng Juan, Lei Li, Huan-Kai Peng, Diana Marculescu and Christos Faloutsos, "Beyond Poisson: Modeling Inter-Arrival Times of Requests in a Datacenter", In The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), 2014. BibTeX: @inproceedings{juan2014poisson, author = {Juan, Da-Cheng and Li, Lei and Peng, Huan-Kai and Marculescu, Diana and Faloutsos, Christos}, title = {Beyond Poisson: Modeling Inter-Arrival Times of Requests in a Datacenter}, booktitle = {The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)}, year = {2014} }  Lei Li, Bharath Ramsundar and Stuart Russell, "Dynamic Scaled Sampling for Deterministic Constraints", In 16th International Conference on Artificial Intelligence and Statistics (AISTATS), 2013. Abstract: Deterministic and near-deterministic relationships among subsets of random variables in multivariate systems are known to cause serious problems for Monte Carlo algorithms. We examine the case in which the relationship Z = f(X1,...,Xk) holds, where each Xi has a continuous prior pdf and we wish to obtain samples from the conditional distribution P(X1,...,Xk | Z = s). When f is addition, the problem is NP-hard even when the Xi are independent. In more restricted cases—for example, i.i.d. Boolean or categorical Xi—efficient exact samplers have been obtained previously. For the general continuous case, we propose a dynamic scaling algorithm (DYSC), and prove that it has O(k) expected running time and finite variance. We discuss generalizations of DYSC to functions f described by binary operation trees. We evaluate the algorithm on several examples. BibTeX: @inproceedings{li2013dynamic, author = {Li, Lei and Ramsundar, Bharath and Russell, Stuart}, title = {Dynamic Scaled Sampling for Deterministic Constraints}, booktitle = {16th International Conference on Artificial Intelligence and Statistics (AISTATS)}, year = {2013} }  Sharad Vikram, Lei Li and Stuart Russell, "Handwriting and Gestures in the Air, Recognizing on the Fly", In ACM Conference on Human Factors in Computing Systems (CHI) Extended Abstracts, 2013. Abstract: Recent technologies in vision sensors are capable of capturing 3D finger positions and movements. We propose a novel way to control and interact with computers by moving fingers in the air. The positions of fingers are precisely captured by a computer vision device. By tracking the moving patterns of fingers, we can then recognize users’ intended control commands or input information. We demonstrate this human input approach through an example application of handwriting recognition. By treating the input as a time series of 3D positions, we propose a fast algorithm using dynamic time warping to recognize characters in online fashion. We employ various optimization techniques to recognize in real time as one writes. Experiments show promising recognition performance and speed. BibTeX: @inproceedings{vikram2013handwriting, author = {Vikram, Sharad and Li, Lei and Russell, Stuart}, title = {Handwriting and Gestures in the Air, Recognizing on the Fly}, booktitle = {ACM Conference on Human Factors in Computing Systems (CHI) Extended Abstracts}, year = {2013} }  Siyuan Liu, Lei Li and Ramayya Krishnan, "Hibernating Process: Modelling Mobile Calls at Multiple Scales", In IEEE International Conference on Data Mining (ICDM), 2013. BibTeX: @inproceedings{liu2013hibernating, author = {Liu, Siyuan and Li, Lei and Krishnan, Ramayya}, title = {Hibernating Process: Modelling Mobile Calls at Multiple Scales}, booktitle = {IEEE International Conference on Data Mining (ICDM)}, year = {2013} }  Yusuf Erol, Lei Li, Bharath Ramsundar and Stuart J. Russell, "The Extended Parameter Filter", In Proceedings of the 30th International Conference on Machine learning (ICML), 2013. Abstract: The parameters of temporal models, such as dynamic Bayesian networks, may be modelled in a Bayesian context as static or atemporal variables that influence transition probabilities at every time step. Particle filters fail for models that include such variables, while methods that use Gibbs sampling of parameter variables may incur a per-sample cost that grows linearly with the length of the observation sequence. Storvik devised a method for incremental computation of exact sufficient statistics that, for some cases, reduces the per-sample cost to a constant. In this paper, we demonstrate a connection between Storvik's filter and a Kalman filter in parameter space and establish more general conditions under which Storvik's filter works. Drawing on an analogy to the extended Kalman filter, we develop and analyze, both theoretically and experimentally, a Taylor approximation to the parameter posterior that allows Storvik's method to be applied to a broader class of models. Our experiments on both synthetic examples and real applications show improvement over existing methods. BibTeX: @inproceedings{erol2013extended, author = {Erol, Yusuf and Li, Lei and Ramsundar, Bharath and Russell, Stuart J.}, title = {The Extended Parameter Filter}, booktitle = {Proceedings of the 30th International Conference on Machine learning (ICML)}, year = {2013} }  Bin Fu, Jialiu Lin, Lei Li, Christos Faloutsos, Jason Hong and Norman Sadeh, "Why People Hate Your App - Making Sense of User Feedback in a Mobile App Store", In the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), New York, NY, USA ACM, 2013. Abstract: User review is a crucial component of open mobile app mar- kets such as the Google Play Store. How do we automatically summarize millions of user reviews and make sense out of them? Unfortunately, beyond simple summaries such as histograms of user ratings, there are few analytic tools that can provide insights into user reviews. In this paper, we propose WisCom, a system that can analyze tens of millions user ratings and comments in mobile app markets at three different levels of detail. Our system is able to (a) discover inconsistencies in reviews; (b) identify reasons why users like or dislike a given app, and provide an interactive, zoomable view of how users’ reviews evolve over time; and (c) provide valuable insights into the entire app market, identifying users’ major concerns and preferences of different types of apps. Results using our techniques are reported on a 32GB dataset consisting of over 13 million user reviews of 171,493 Android apps in the Google Play Store. We discuss how the techniques presented herein can be deployed to help a mobile app market operator such as Google as well as individual app developers and end-users. BibTeX: @inproceedings{fu2013why, author = {Fu, Bin and Lin, Jialiu and Li, Lei and Faloutsos, Christos and Hong, Jason and Sadeh, Norman}, title = {Why People Hate Your App - Making Sense of User Feedback in a Mobile App Store}, booktitle = {the 19th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD)}, publisher = {ACM}, year = {2013} }  Mark Rogers, Lei Li and Stuart J. Russell, "Multilinear Dynamical Systems for Tensor Time Series", In the 27th Conference on Neural Information Processing Systems(NeurIPS), 2013. BibTeX: @inproceedings{rogers2013multilinear, author = {Rogers, Mark and Li, Lei and Russell, Stuart J.}, title = {Multilinear Dynamical Systems for Tensor Time Series}, booktitle = {the 27th Conference on Neural Information Processing Systems(NeurIPS)}, year = {2013} }  Yasuko Matsubara, Lei Li, Evangelos E. Papalexakis, David Lo, Yasushi Sakurai and Christos Faloutsos, "F-Trail: Finding Patterns in Taxi Trajectories", In The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD), pp. 86-98., 2013. BibTeX: @inproceedings{matsubara2013f, author = {Matsubara, Yasuko and Li, Lei and Papalexakis, Evangelos E. and Lo, David and Sakurai, Yasushi and Faloutsos, Christos}, title = {F-Trail: Finding Patterns in Taxi Trajectories}, booktitle = {The Pacific-Asia Conference on Knowledge Discovery and Data Mining (PAKDD)}, year = {2013}, pages = {86--98} }  Lei Li and Stuart J. Russell, "The BLOG Language Reference". EECS Department, University of California, BerkeleyEECS Department, University of California, Berkeley, Technical Report UCB/EECS-2013-51, May, 2013. Abstract: This document introduces the syntax of BLOG, a probabilistic programming language, for describing random variables and their probabilistic dependencies. BLOG defines probabilistic generative models over first-order structures. For example, all Bayesian networks can be easily described by BLOG. BLOG has the following features: (a) it employs open-universe semantics; (b) it can describe relational uncertainty; (c) it can handle identity uncertainty; and (d) it is empowered by first-order logic. The syntax as described in this document corresponds to BLOG version 0.6. The current version represents a significant redesign and extension to previous versions of BLOG, based on the principles of usability and implementation efficiency. BibTeX: @techreport{li2013blog, author = {Li, Lei and Russell, Stuart J.}, title = {The BLOG Language Reference}, school = {EECS Department, University of California, Berkeley}, year = {2013}, number = {UCB/EECS-2013-51} }  Keith Henderson, Brian Gallagher, Tina Eliassi-Rad, Hanghang Tong, Sugato Basu, Leman Akoglu, Danai Koutra, Christos Faloutsos and Lei Li, "RolX: Structural Role Extraction and Mining in Large Graphs", In Proceeding of the 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), New York, NY, USA ACM, 2012. BibTeX: @inproceedings{henderson2012rolx, author = {Henderson, Keith and Gallagher, Brian and Eliassi-Rad, Tina and Tong, Hanghang and Basu, Sugato and Akoglu, Leman and Koutra, Danai and Faloutsos, Christos and Li, Lei}, title = {RolX: Structural Role Extraction and Mining in Large Graphs}, booktitle = {Proceeding of the 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD)}, publisher = {ACM}, year = {2012} }  Yasuko Matsubara, Yasushi Sakurai, B. Aditya Prakash, Lei Li and Christos Faloutsos, "Rise and Fall Patterns of Information Diffusion: Model and Implications", In Proceeding of the 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), New York, NY, USA ACM, 2012. BibTeX: @inproceedings{matsubara2012rise, author = {Matsubara, Yasuko and Sakurai, Yasushi and Prakash, B. Aditya and Li, Lei and Faloutsos, Christos}, title = {Rise and Fall Patterns of Information Diffusion: Model and Implications}, booktitle = {Proceeding of the 18th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD)}, publisher = {ACM}, year = {2012} }  Siyuan Liu, Lei Li, Christos Faloutsos and Lionel Ni, "Mobile Phone Graph Evolution: Findings, Model and Interpretation", In IEEE International Conference on Data Mining, workshop on Data Mining Technologies for Computational Collective Intelligence, 2011. BibTeX: @inproceedings{liu2011mobile, author = {Liu, Siyuan and Li, Lei and Faloutsos, Christos and Ni, Lionel}, title = {Mobile Phone Graph Evolution: Findings, Model and Interpretation}, booktitle = {IEEE International Conference on Data Mining, workshop on Data Mining Technologies for Computational Collective Intelligence}, year = {2011} }  Keith Henderson, Brian Gallagher, Lei Li, Leman Akoglu, Tina Eliassi-Rad, Hanghang Tong and Christos Faloutsos, "It's Who You Know: Graph Mining Using Recursive Structural Features", In Proceeding of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), New York, NY, USA ACM, 2011. BibTeX: @inproceedings{henderson2011its, author = {Henderson, Keith and Gallagher, Brian and Li, Lei and Akoglu, Leman and Eliassi-Rad, Tina and Tong, Hanghang and Faloutsos, Christos}, title = {It's Who You Know: Graph Mining Using Recursive Structural Features}, booktitle = {Proceeding of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD)}, publisher = {ACM}, year = {2011} }  Lei Li, Chieh-Jan Mike Liang, Jie Liu, Suman Nath, Andreas Terzis and Christos Faloutsos, "ThermoCast: A Cyber-Physical Forecasting Model for Data Centers", In Proceeding of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD), New York, NY, USA ACM, 2011. Abstract: Efficient thermal management is important in modern data centers as cooling consumes up to 50% of the total energy. Unlike previous work, we consider proactive thermal management, whereby servers can predict potential overheating events due to dynamics in data center configuration and workload, giving operators enough time to react. However, such forecasting is very challenging due to data center scales and complexity. Moreover, such a physical system is influenced by cyber effects, including workload scheduling in servers. We propose ThermoCast, a novel thermal forecasting model to predict the temperatures surrounding the servers in a data center, based on continuous streams of temperature and airflow measurements. Our approach is (a) capable of capturing cyber- physical interactions and automatically learning them from data; (b) computationally and physically scalable to data center scales; (c) able to provide online prediction with real-time sensor mea- surements. The paper’s main contributions are: (i) We provide a systematic approach to integrate physical laws and sensor observa- tions in a data center; (ii) We provide an algorithm that uses sensor data to learn the parameters of a data center’s cyber-physical sys- tem. In turn, this ability enables us to reduce model complexity compared to full-fledged fluid dynamics models, while maintain- ing forecast accuracy; (iii) Unlike previous simulation-based stud- ies, we perform experiments in a production data center. Using real data traces, we show that ThermoCast forecasts temperature 2× better than a machine learning approach solely driven by data, and can successfully predict thermal alarms 4.2 minutes ahead of time. BibTeX: @inproceedings{li2011thermocast, author = {Li, Lei and Liang, Chieh-Jan Mike and Liu, Jie and Nath, Suman and Terzis, Andreas and Faloutsos, Christos}, title = {ThermoCast: A Cyber-Physical Forecasting Model for Data Centers}, booktitle = {Proceeding of the 17th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining (KDD)}, publisher = {ACM}, year = {2011} }  Lei Li and B. Aditya Prakash, "Time Series Clustering: Complex is Simpler!", In Proceedings of the 28th International Conference on Machine Learning (ICML), Bellevue, Washington, 2011. BibTeX: @inproceedings{li2011time, author = {Li, Lei and Prakash, B. Aditya}, title = {Time Series Clustering: Complex is Simpler!}, booktitle = {Proceedings of the 28th International Conference on Machine Learning (ICML)}, year = {2011} }  Yasushi Sakurai, Lei Li, Yasuko Matsubara and Christos Faloutsos, "WindMine: Fast and Effective Mining of Web-click Sequences", In SIAM International Conference on Data Mining (SDM), 2011. BibTeX: @inproceedings{sakurai2011windmine, author = {Sakurai, Yasushi and Li, Lei and Matsubara, Yasuko and Faloutsos, Christos}, title = {WindMine: Fast and Effective Mining of Web-click Sequences}, booktitle = {SIAM International Conference on Data Mining (SDM)}, year = {2011} }  Lei Li, "Fast algorithms for mining co-evolving time series" . Ph.D. Dissertation, Carnegie Mellon University. , Available as technical report CMU-CS-11-127. , 2011. BibTeX: @phdthesis{li2011fast, author = {Li, Lei}, title = {Fast algorithms for mining co-evolving time series}, school = {Carnegie Mellon University}, year = {2011} }  Lei Li, "Fast Algorithms for Time Series Mining", In 26th IEEE International Conference on Data Engineering, PHD Workshop, pp. 341-344., 2010. BibTeX: @inproceedings{li2010fast, author = {Li, Lei}, title = {Fast Algorithms for Time Series Mining}, booktitle = {26th IEEE International Conference on Data Engineering, PHD Workshop}, year = {2010}, pages = {341--344} }  Keith Henderson, Tina Eliassi-Rad, Christos Faloutsos, Leman Akoglu, Lei Li, Koji Maruhashi, B. Aditya Prakash and Hanghang Tong, "Metric forensics: a multi-level approach for mining volatile graphs", In Proceedings of the 16th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD), New York, NY, USA, pp. 163-172. ACM, 2010. BibTeX: @inproceedings{henderson2010metric, author = {Henderson, Keith and Eliassi-Rad, Tina and Faloutsos, Christos and Akoglu, Leman and Li, Lei and Maruhashi, Koji and Prakash, B. Aditya and Tong, Hanghang}, title = {Metric forensics: a multi-level approach for mining volatile graphs}, booktitle = {Proceedings of the 16th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD)}, publisher = {ACM}, year = {2010}, pages = {163--172}, doi = {https://doi.org/10.1145/1835804.1835828} }  Lei Li, James McCann, Nancy Pollard and Christos Faloutsos, "BoLeRO: a principled technique for including bone length constraints in motion capture occlusion filling", In Proceedings of the 2010 ACM SIGGRAPH/Eurographics Symposium on Computer Animation (SCA), Aire-la-Ville, Switzerland, Switzerland, pp. 179-188. Eurographics Association, 2010. BibTeX: @inproceedings{li2010bolero, author = {Li, Lei and McCann, James and Pollard, Nancy and Faloutsos, Christos}, title = {BoLeRO: a principled technique for including bone length constraints in motion capture occlusion filling}, booktitle = {Proceedings of the 2010 ACM SIGGRAPH/Eurographics Symposium on Computer Animation (SCA)}, publisher = {Eurographics Association}, year = {2010}, pages = {179--188} }  Lei Li, Bin Fu and Christos Faloutsos, "Efficient Parallel Learning of Hidden Markov chain Models on SMPs", IEICE Transactions on Information and Systems, Volume E93.D(6), pp. 1330-1342., 2010. Abstract: Quad-core cpus have been a common desktop configuration for today’s office. The increasing number of processors on a single chip opens new opportunity for parallel computing. Our goal is to make use of the multi-core as well as multi-processor architectures to speed up large-scale data mining algorithms. In this paper, we present a general par- allel learning framework, Cut-And-Stitch, for training hidden Markov chain models. Particularly, we propose two model-specific variants, CAS-LDS for learning linear dynamical systems (LDS) and CAS-HMM for learning hidden Markov models (HMM). Our main contribution is a novel method to handle the data dependencies due to the chain structure of hidden variables, so as to parallelize the EM-based parameter learning algorithm. We imple- ment CAS-LDS and CAS-HMM using OpenMP on two supercomputers and a quad-core commercial desktop. The experimental results show that parallel algorithms using Cut-And-Stitch achieve comparable accuracy and almost linear speedups over the traditional serial version. BibTeX: @article{li2010efficient, author = {Li, Lei and Fu, Bin and Faloutsos, Christos}, title = {Efficient Parallel Learning of Hidden Markov chain Models on SMPs}, journal = {IEICE Transactions on Information and Systems}, year = {2010}, volume = {E93.D}, number = {6}, pages = {1330--1342} }  Lei Li, B. Aditya Prakash and Christos Faloutsos, "Parsimonious linear fingerprinting for time series", The Proceedings of the Very Large Data Bases Endowment (VLDB), Volume 3, pp. 385-396. VLDB Endowment, 2010. BibTeX: @article{li2010parsimonious, author = {Li, Lei and Prakash, B. Aditya and Faloutsos, Christos}, title = {Parsimonious linear fingerprinting for time series}, journal = {The Proceedings of the Very Large Data Bases Endowment (VLDB)}, publisher = {VLDB Endowment}, year = {2010}, volume = {3}, pages = {385--396} }  Lei Li, James McCann, Nancy Pollard and Christos Faloutsos, "DynaMMo: Mining and Summarization of Coevolving Sequences with Missing Values", In Proceeding of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD), New York, NY, USA ACM, 2009. BibTeX: @inproceedings{li2009dynammo, author = {Li, Lei and McCann, James and Pollard, Nancy and Faloutsos, Christos}, title = {DynaMMo: Mining and Summarization of Coevolving Sequences with Missing Values}, booktitle = {Proceeding of the 15th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD)}, publisher = {ACM}, year = {2009} }  Fan Guo, Lei Li and Christos Faloutsos, "Tailoring click models to user goals", In Proceedings of the 2009 workshop on Web Search Click Data, New York, NY, USA, pp. 88-92. ACM, 2009. BibTeX: @inproceedings{guo2009tailoring, author = {Guo, Fan and Li, Lei and Faloutsos, Christos}, title = {Tailoring click models to user goals}, booktitle = {Proceedings of the 2009 workshop on Web Search Click Data}, publisher = {ACM}, year = {2009}, pages = {88--92}, doi = {https://doi.org/10.1145/1507509.1507523} }  Wanhong Xu, Xi Zhou and Lei Li, "Inferring privacy information via social relations", In IEEE 24th International Conference on Data Engineering workshops, pp. 525-530., 2008. BibTeX: @inproceedings{xu2008inferring, author = {Xu, Wanhong and Zhou, Xi and Li, Lei}, title = {Inferring privacy information via social relations}, booktitle = {IEEE 24th International Conference on Data Engineering workshops}, year = {2008}, pages = {525--530}, doi = {https://doi.org/10.1109/ICDEW.2008.4498373} }  Lei Li, Wenjie Fu, Fan Guo, Todd C. Mowry and Christos Faloutsos, "Cut-and-Stitch: efficient parallel learning of linear dynamical systems on smps", In Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD), New York, NY, USA, pp. 471-479. ACM, 2008. Abstract: Multi-core processors with ever increasing number of cores per chip are becoming prevalent in modern parallel computing. Our goal is to make use of the multi-core as well as multi-processor architectures to speed up data mining algorithms. Specifically, we present a parallel algorithm for approximate learning of Linear Dynamical Systems (LDS), also known as Kalman Filters (KF). LDSs are widely used in time series analysis such as motion capture modeling and visual tracking etc. We propose Cut-And-Stitch (CAS), a novel method to handle the data dependencies due to the chain structure of hidden variables in LDS, so as to parallelize the EM- based parameter learning algorithm. We implement the algorithm using OpenMP on both a supercomputer and a quad-core commercial desktop. The experimental results show that parallel algorithms using Cut-And-Stitch achieve comparable accuracy and almost linear speedups over the serial version. In addition, Cut-And-Stitch can be generalized to other models with similar linear structures such as Hidden Markov Models (HMM) and Switching Kalman Filters (SKF). BibTeX: @inproceedings{li2008cut, author = {Li, Lei and Fu, Wenjie and Guo, Fan and Mowry, Todd C. and Faloutsos, Christos}, title = {Cut-and-Stitch: efficient parallel learning of linear dynamical systems on smps}, booktitle = {Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining (KDD)}, publisher = {ACM}, year = {2008}, pages = {471--479} }  Yasushi Sakurai, Rosalynn Chong, Lei Li and Christos Faloutsos, "Efficient Distribution Mining and Classification", In SIAM International Conference on Data Mining (SDM), pp. 632-643., 2008. BibTeX: @inproceedings{sakurai2008efficient, author = {Sakurai, Yasushi and Chong, Rosalynn and Li, Lei and Faloutsos, Christos}, title = {Efficient Distribution Mining and Classification}, booktitle = {SIAM International Conference on Data Mining (SDM)}, year = {2008}, pages = {632--643} }  Lei Li, James McCann, Christos Faloutsos and Nancy Pollard, "Laziness is a virtue: Motion stitching using effort minimization", In The 29th Annual Conference of the European Association for Computer Graphics (EG), Short Paper Proceedings, 2008. BibTeX: @inproceedings{li2008laziness, author = {Li, Lei and McCann, James and Faloutsos, Christos and Pollard, Nancy}, title = {Laziness is a virtue: Motion stitching using effort minimization}, booktitle = {The 29th Annual Conference of the European Association for Computer Graphics (EG), Short Paper Proceedings}, year = {2008} }  Fan Guo, Lei Li, Christos Faloutsos and Eric P. Xing, "C-DEM: a multi-modal query system for Drosophila Embryo databases", The Proceedings of the Very Large Data Bases Endowment (VLDB), Volume 1, pp. 1508-1511. VLDB Endowment, 2008. BibTeX: @article{guo2008c, author = {Guo, Fan and Li, Lei and Faloutsos, Christos and Xing, Eric P.}, title = {C-DEM: a multi-modal query system for Drosophila Embryo databases}, journal = {The Proceedings of the Very Large Data Bases Endowment (VLDB)}, publisher = {VLDB Endowment}, year = {2008}, volume = {1}, pages = {1508--1511} }  Lei Li, Qiaoling Liu, Yunfeng Tao, Lei Zhang, Jian Zhou and Yong Yu, "Providing an Uncertainty Reasoning Service for Semantic Web Application", In Asia-Pacific Web Conference, pp. 628-639., 2006. BibTeX: @inproceedings{li2006providing, author = {Li, Lei and Liu, Qiaoling and Tao, Yunfeng and Zhang, Lei and Zhou, Jian and Yu, Yong}, title = {Providing an Uncertainty Reasoning Service for Semantic Web Application}, booktitle = {Asia-Pacific Web Conference}, year = {2006}, pages = {628--639} } 

Created by JabRef on 2020/07/03. Return to Home