from hamu_tool.dataset import DataLoader
loader = DataLoader.load('beir/msmarco')
for doc in loader.get_docs():
print(doc.id, doc.text)
break
for query in loader.get_queries():
print(query.id, query.text)
break
for qrel in loader.get_qrels('[mode]'):
print(qrel.qid, qrel.did, qrel.score)
break
@inproceedings{Bajaj2016Msmarco,
title={MS MARCO: A Human Generated MAchine Reading COmprehension Dataset},
author={Payal Bajaj, Daniel Campos, Nick Craswell, Li Deng, Jianfeng Gao, Xiaodong Liu, Rangan Majumder, Andrew McNamara, Bhaskar Mitra, Tri Nguyen, Mir Rosenberg, Xia Song, Alina Stoica, Saurabh Tiwary, Tong Wang},
booktitle={InCoCo@NIPS},
year={2016}
}
@article{Thakur2021Beir,
title = "BEIR: A Heterogenous Benchmark for Zero-shot Evaluation of Information Retrieval Models",
author = "Thakur, Nandan and Reimers, Nils and Rücklé, Andreas and Srivastava, Abhishek and Gurevych, Iryna",
journal= "arXiv preprint arXiv:2104.08663",
month = "4",
year = "2021",
url = "https://arxiv.org/abs/2104.08663",
}