@inproceedings{d9b30519dabf46b88cd3a89c27d756f0,
title = "Efficient filtering with sketches in the ferret toolkit",
abstract = "Ferret is a toolkit for building content-based similarity search systems for feature-rich data types such as audio, video, and digital photos.The key component of this toolkit is a content-based similarity search engine for generic, multi-feature object representations. This paper describes the filtering mechanism used in the Ferret toolkit and experimental results with several datasets. The filtering mechanism uses approximation algorithms to generate a candidate set, and then ranks the objects in the candidate set with a more sophisticated multi-feature distance measure. The paper compared two filtering methods: using segment feature vectors and sketches constructed from segment feature vectors. Our experimental results show that filtering can substantially speedup the search process and reduce memory requirement while maintaining good search quality. To help systems designers choose the filtering parameters, we have developed a rank-based analytical model for the filtering algorithm using sketches. Our experiments show that the model gives conservative and good prediction for different datasets.",
keywords = "Feature-rich data, Filtering, Similarity search, Sketch, Toolkit",
author = "Qin Lv and William Josephson and Zhe Wang and Moses Charikar and Kai Li",
year = "2006",
doi = "10.1145/1178677.1178715",
language = "English (US)",
isbn = "1595934952",
series = "Proceedings of the ACM International Multimedia Conference and Exhibition",
pages = "279--288",
booktitle = "Proceedings of the 8th ACM Multimedia International Workshop on Multimedia Information Retrieval, MIR 2006",
note = "8th ACM Multimedia International Workshop on Multimedia Information Retrieval, MIR 2006, co-located with the 2006 ACM International Multimedia Conferenc ; Conference date: 26-10-2006 Through 27-10-2006",
}