@incollection{Zhang2004, abstract = {Effectively summarizing Web page collections becomes more and more critical as the amount of information continues to grow on the World Wide Web. A concise and meaningful summary of a Web page collection, which is generated automatically, can help Web users understand the essential topics and main contents covered in the collection quickly without spending much browsing time. However, automatically generating coherent summaries as good as human-authored summaries is a challenging task since Web page collections often contain diverse topics and contents. This research aims towards clustering of Web page collections using automatically extracted topical terms, and automatic summarization of the resulting clusters. We experiment with word- and term-based representations of Web documents and demonstrate that term-based clustering significantly outperforms word-based clustering with much lower dimensionality. The summaries of computed clusters are informative and meaningful, which indicates that clustering and summarization of large Web page collections is promising for alleviating the information overload problem.}, author = {Zhang, Yongzheng and Zincir-Heywood, Nur and Milios, Evangelos }, citeulike-article-id = {4849263}, journal = {Advances in Artificial Intelligence}, pages = {60--74}, posted-at = {2009-06-15 00:21:47}, priority = {2}, title = {Term-Based Clustering and Summarization of Web Page Collections}, url = {http://www.springerlink.com/content/x8e6l39ltdyq78pv}, year = {2004} }