The project leverages the distributed framework to implement Affinity Clustering, a hierarchical clustering method, at scale.
More details can be found in the below Overleaf link.
-
Get in an environment with
Spark
installed and configured -
Install dependencies
pip install -r requirements.txt
- Load the network datasets (heavy enough to provide in the repo)
bash load-network-datasets.sh
- Run the notebook
./notebooks/experiments.ipynb
@inproceedings{NIPS2017_2e1b24a6,
author = {Bateni, Mohammadhossein and Behnezhad, Soheil and Derakhshan, Mahsa and Hajiaghayi, MohammadTaghi and Kiveris, Raimondas and Lattanzi, Silvio and Mirrokni, Vahab},
booktitle = {Advances in Neural Information Processing Systems},
editor = {I. Guyon and U. Von Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett},
pages = {},
publisher = {Curran Associates, Inc.},
title = {Affinity Clustering: Hierarchical Clustering at Scale},
url = {https://proceedings.neurips.cc/paper_files/paper/2017/file/2e1b24a664f5e9c18f407b2f9c73e821-Paper.pdf},
volume = {30},
year = {2017}
}
@inproceedings{nr,
title={The Network Data Repository with Interactive Graph Analytics and Visualization},
author={Ryan A. Rossi and Nesreen K. Ahmed},
booktitle={AAAI},
url={https://networkrepository.com},
year={2015}
}