@inproceedings{ilprints645, booktitle = {Proceedings of the International Conference on Database Theory (ICDT 2005)}, month = {November}, title = {Approximation Algorithms for k-Anonymity}, author = {Gagan Aggarwal and Tomas Feder and Krishnaram Kenthapadi and Rajeev Motwani and Rina Panigrahy and Dilys Thomas and An Zhu}, year = {2005}, journal = {Journal of Privacy Technology (JOPT)}, url = {http://ilpubs.stanford.edu:8090/645/}, abstract = {We consider the problem of releasing a table containing personal records, while ensuring individual privacy and maintaining data integrity to the extent possible. One of the techniques proposed in the literature is k-anonymization. A release is considered k-anonymous if the information corresponding to any individual in the release cannot be distinguished from that of at least k-1 other individuals whose information also appears in the release. In order to achieve k-anonymization, some of the entries of the table are either suppressed or generalized (e.g. an Age value of 23 could be changed to the Age range 20-25). The goal is to lose as little information as possible while ensuring that the release is k-anonymous. This optimization problem is referred to as the k-Anonymity problem. We show that the k-Anonymity problem is NP-hard even when the attribute values are ternary and we are allowed only to suppress entries. On the positive side, we provide an O(k)-approximation algorithm for the problem. We also give improved positive results for the interesting cases with specific values of k --- in particular, we give a 1.5-approximation algorithm for the special case of 2-Anonymity, and a 2-approximation algorithm for 3-Anonymity. } }