dc.creatorAlmeida T.A.
dc.creatorYamakami A.
dc.date2012
dc.date2015-06-25T20:24:37Z
dc.date2015-11-26T15:20:12Z
dc.date2015-06-25T20:24:37Z
dc.date2015-11-26T15:20:12Z
dc.date.accessioned2018-03-28T22:29:42Z
dc.date.available2018-03-28T22:29:42Z
dc.identifier
dc.identifierJournal Of Internet Services And Applications. , v. 3, n. 3, p. 245 - 253, 2012.
dc.identifier18674828
dc.identifier10.1007/s13174-012-0067-x
dc.identifierhttp://www.scopus.com/inward/record.url?eid=2-s2.0-84888619395&partnerID=40&md5=d97d7ad953d447dbce0b4f5ef93262cd
dc.identifierhttp://www.repositorio.unicamp.br/handle/REPOSIP/90271
dc.identifierhttp://repositorio.unicamp.br/jspui/handle/REPOSIP/90271
dc.identifier2-s2.0-84888619395
dc.identifier.urihttp://repositorioslatinoamericanos.uchile.cl/handle/2250/1259922
dc.descriptionNowadays e-mail spam is not a novelty, but it is still an important rising problem with a big economic impact in society. Spammers manage to circumvent current spam filters and harm the communication system by consuming several resources, damaging the reliability of e-mail as a communication instrument and tricking recipients to react to spam messages. Consequently, spam filtering poses a special problem in text categorization, of which the defining characteristic is that filters face an active adversary, which constantly attempts to evade filtering. In this paper, we present a novel approach to spam filtering based on theminimum description length principle. Furthermore, we have conducted an empirical experiment on six public and real non-encoded datasets. The results indicate that the proposed filter is fast to construct, incrementally updateable and clearly outperforms the state-of-the-art spam filters. © The Brazilian Computer Society 2012.
dc.description3
dc.description3
dc.description245
dc.description253
dc.descriptionAlmeida, T., Yamakami, A., Content-based spam filtering (2010) Proceedings of the 23rd IEEE International Joint Conference On Neural Networks, pp. 1-7. , Barcelona, Spain
dc.descriptionAlmeida, T., Yamakami, A., Redução de Dimensionalidade Aplicada na Classificação de Spams Usando Filtros Bayesianos (2011) Revista Brasileira De Computação Aplicada, 3 (1), pp. 16-29
dc.descriptionAlmeida, T., Yamakami, A., Almeida, J., Evaluation of approaches for dimensionality reduction applied with Naive Bayes anti-spam filters (2009) Proceedings of the 8th IEEE International Conference On Machine Learning and Applications, pp. 517-522. , Miami, FL, USA
dc.descriptionAlmeida, T., Yamakami, A., Almeida, J., Filtering spams using the minimum description length principle (2010) Proceedings of the 25th ACM Symposium On Applied Computing, pp. 1856-1860. , Sierre, Switzerland
dc.descriptionAlmeida, T., Yamakami, A., Almeida, J., Probabilistic antispam filtering with dimensionality reduction (2010) Proceedings of the 25th ACM Symposium On Applied Computing, pp. 1804-1808. , Sierre, Switzerland
dc.descriptionAlmeida, T., Hidalgo, J.G., Yamakami, A., Contributions to the study of SMS spam filtering: New collection and results (2011) Proceedings of the 2011 ACM Symposium On Document Engineering, pp. 259-262. , Mountain View, CA, USA
dc.descriptionAlmeida, T., Almeida, J., Yamakami, A., Spam filtering: How the dimensionality reduction affects the accuracy of Naive Bayes classifiers (2011) J Internet Serv Appl, 1 (3), pp. 183-200
dc.descriptionAlmeida, T.A., Yamakami, A., Advances in spam filtering techniques (2012) Com Putational Intelligence For Privacy and Security. Studies In Computational Intelligence, 394, pp. 199-214. , In: Elizondo D, Solanas A,Martinez-Balleste A (eds), Springer, Berlin
dc.descriptionAlmeida, T.A., Yamakami, A., Facing the spammers: A very effective approach to avoid junk e-mails (2012) Expert Syst Appl, pp. 1-5
dc.descriptionAnagnostopoulos, A., Broder, A., Punera, K., Effective and efficient classification on a search-engine model (2008) Knowl Inf Syst, 16 (2), pp. 129-154
dc.descriptionAndroutsopoulos, I., Koutsias, J., Chandrinos, K., Paliouras, G., Spyropoulos C (2000a) An evalutation of Naive Bayesian anti-spam filtering Proceedings of the 11th European Conference On Machine Learning, pp. 9-17. , Barcelona, Spain
dc.descriptionAndroutsopoulos, I., Paliouras, G., Karkaletsis, V., Sakkis, G., Spyropoulos, C., Stamatopoulos, P., Learning to filter spam e-mail: A comparison of a Naive Bayesian and a memory-based approach (2000) Proceedings of the 4th European Conference On Principles and Practice of Knowledge Discovery In Databases, pp. 1-13. , Lyon, France
dc.descriptionAndroutsopoulos, I., Paliouras, G., Michelakis, E., (2004) Learning to Filter Unsolicited Commercial E-mail, , Technical Report 2004/2, National Centre for Scientific Research "Demokritos", Athens, Greece
dc.descriptionBaldi, P., Brunak, S., Chauvin, Y., Andersen, C., Nielsen, H., Assessing the accuracy of prediction algorithms for classification: An overview (2000) Bioinformatics, 16 (5), pp. 412-424
dc.descriptionBarron, A., Rissanen, J., Yu, B., The minimum description length principle in coding and modeling (1998) IEEE Trans Inf Theory, 44 (6), pp. 2743-2760
dc.descriptionBlanzieri, E., Bryl, A., A survey of learning-based techniques of email spam filtering (2008) Artif Intell Rev, 29 (1), pp. 335-455
dc.descriptionBordes, A., Ertekin, S., Weston, J., Bottou, L., Fast kernel classifiers with online and active learning (2005) J Mach Learn Res, 6, pp. 1579-1619
dc.descriptionBratko, A., Cormack, G., Filipic, B., Lynam, T., Zupan, B., Spam filtering using statistical data compression models (2006) J Mach Learn Res, 7, pp. 2673-2698
dc.descriptionCarreras, X., Marquez, L., Boosting trees for anti-spam email filtering (2001) Proceedings of the 4th International Conference On Recent Advances In Natural Language Processing, pp. 58-64. , Tzigov Chark, Bulgaria
dc.descriptionCohen, W., Fast effective rule induction (1995) Proceedings of 12th International Conference On Machine Learning, pp. 115-123. , Tahoe City, CA, USA
dc.descriptionCohen, W., Learning rules that classify e-mail (1996) Proceedings of the AAAI Spring Symposium On Machine Learning In Information Access, pp. 18-25. , CA, USA, Stanford
dc.descriptionCormack, G., Email spam filtering: A systematic review (2008) Found Trends Inf Retr, 1 (4), pp. 335-455
dc.descriptionCormack, G., Lynam, T., Online supervised spam filter evaluation (2007) ACM Trans Inf Syst, 25 (3), pp. 1-11
dc.descriptionCzarnowski, I., Cluster-based instance selection for machine classification (2011) Knowl Inf Syst
dc.descriptionDrucker, H., Wu, D., Vapnik, V., Support vector machines for spam categorization (1999) IEEE Trans Neural Netw, 10 (5), pp. 1048-1054
dc.descriptionForman, G., Scholz, M., Rajaram, S., Feature shaping for linear SVM classifiers (2009) Proceedings of the 15th ACM SIGKDD International Conference On Knowledge Discovery and Data Mining, pp. 299-308. , France, Paris
dc.descriptionFrank, E., Chui, C., Witten, I., Text categorization using compression models (2000) Proceedings of the 10th Data Compression Conference, pp. 555-565. , Snowbird, UT, USA
dc.descriptionGrünwald, P., Atutorial introduction to theminimum description length principle (2005) Advances In Minimum Description Length: Theory and Applications, pp. 3-81. , In: Grünwald P, Myung I, Pitt M (eds), MIT Press, Cambridge
dc.descriptionGuzella, T., Caminhas, W., A review of machine learning approaches to spam filtering (2009) Expert Syst Appl, 36 (7), pp. 10206-10222
dc.descriptionHidalgo, J., Evaluating cost-sensitive unsolicited bulk mail categorization (2002) Proceedings of the 17th ACM Symposium On Applied Computing, pp. 615-620. , Madrid, Spain
dc.descriptionJoachims, T., A probabilistic analysis of the Rocchio algorithm with TFIDF for text categorization (1997) Proceedings of 14th International Conference On Machine Learning, pp. 143-151. , Nashville, TN, USA
dc.descriptionJohn, G., Langley, P., Estimating continuous distributions in Bayesian classifiers (1995) Proceedings of the 11th International Conference OnUncertainty In Artificial Intelligence, pp. 338-345. , Montreal,Canada
dc.descriptionKatakis, I., Tsoumakas, G., Vlahavas, I., Tracking recurring contexts using ensemble classifiers: An application to email filtering (2009) Knowl Inf Syst, 22 (3), pp. 371-391
dc.descriptionKolcz, A., Alspector, J., SVM-based filtering of e-mail spam with content-specific misclassification costs (2001) Proceedings of the 1st International Conference On Data Mining, pp. 1-14. , San Jose, CA, USA
dc.descriptionLosada, D., Azzopardi, L., Assessing multivariate Bernoulli models for information retrieval (2008) ACM Trans Inf Syst, 26 (3), pp. 1-46
dc.descriptionMatthews, B., Comparison of the predicted and observed secondary structure of T4 phage lysozyme (1975) Biochimica Et Biophysica Acta, 405 (2), pp. 442-451
dc.descriptionMcCallum, A., Nigam, K., A comparison of event models for Naive Bayes text classication (1998) Proceedings of the 15th AAAI Workshop On Learning For Text Categorization, pp. 41-48. , Menlo Park, CA, USA
dc.descriptionMetsis, V., Androutsopoulos, I., Paliouras, G., Spam filtering with Naive Bayes-which Naive Bayes? (2006) Proceedings of the 3rd International Conference On Email and Anti-Spam, pp. 1-5. , Mountain View, CA, USA
dc.descriptionPeng, T., Zuo, W., He, F., SVM based adaptive learning method for text classification from positive and unlabeled documents (2008) Knowl Inf Syst, 16 (3), pp. 281-301
dc.descriptionReddy, C., Park, J.-H., Multi-resolution boosting for classification and regression problems (2010) Knowl Inf Syst
dc.descriptionRissanen, J., Modeling by shortest data description (1978) Automatica, 14, pp. 465-471
dc.descriptionSahami, M., Dumais, S., Hecherman, D., Horvitz, E., A Bayesian approach to filtering junk e-mail (1998) Proceedings of the 15th NationalConference On Artificial Intelligence, pp. 55-62. , Madison, WI,USA
dc.descriptionSchapire, R., Singer, Y., Singhal, A., Boosting and Rocchio applied to text filtering (1998) Proceedings of the 21st Annual International Conference On Information Retrieval, pp. 215-223. , Melbourne, Australia
dc.descriptionSchneider, K., On word frequency information and negative evidence in Naive Bayes text classification (2004) Proceedings of the 4th International Conference On Advances In Natural Language Processing, pp. 474-485. , Alicante, Spain
dc.descriptionSiefkes, C., Assis, F., Chhabra, S., Yerazunis, W., Combining winnow and orthogonal sparse bigrams for incremental spam filtering (2004) Proceedings of the 8th European Conference On Principles and Practice of Knowledge Discovery In Databases, pp. 410-421. , Pisa, Italy
dc.descriptionSong, Y., Kolcz, A., Gilez, C., Better Naive Bayes classification for high-precision spam detection (2009) Softw Pract Experience, 39 (11), pp. 1003-1024
dc.descriptionTeahan, W., Harper, D., Using compression-based language models for text categorization (2001) Proceedings of the 2001 Workshop On Language Modeling and Information Retrieval, pp. 1-5. , Pittsburgh, PA, USA
dc.descriptionWozniak, M., A hybrid decision tree training method using data streams (2010) Knowl Inf Syst
dc.descriptionWu, X., Kumar, V., Quinlan, J., Ghosh, J., Yang, Q., Motoda, H., McLachlan, G., Steinberg, D., Top 10 algorithms in data mining (2008) Knowl Inf Syst, 14 (1), pp. 1-37
dc.descriptionZhang, J., Kang, D., Silvescu, A., Honavar, V., Learning accurate and concise Naive Bayes classifiers from attribute value taxonomies and data (2006) Knowl Inf Syst, 9 (2), pp. 157-179
dc.descriptionZhang, L., Zhu, J., Yao, T., An evaluation of statistical spam filtering techniques (2004) ACMTrans Asian Lang Inf Process, 3 (4), pp. 243-269
dc.languageen
dc.publisher
dc.relationJournal of Internet Services and Applications
dc.rightsaberto
dc.sourceScopus
dc.titleOccam's Razor-based Spam Filter
dc.typeArtículos de revistas


Este ítem pertenece a la siguiente institución