Fix pipeline tag and add library_name

#1
by nielsr HF staff - opened
Files changed (1) hide show
  1. README.md +10 -80
README.md CHANGED
@@ -1,15 +1,16 @@
1
  ---
2
- license: mit
3
- datasets:
4
- - jhu-clsp/rank1-training-data
5
  base_model:
6
  - mistralai/Mistral-Small-24B-Base-2501
7
- pipeline_tag: text-generation
 
 
 
 
 
 
8
  tags:
9
  - reranker
10
  - retrieval
11
- language:
12
- - en
13
  ---
14
 
15
  # rank1-mistral-2501-24b: Test-Time Compute for Reranking in Information Retrieval
@@ -65,66 +66,7 @@ Note that official usage is found on the Github and accounts for edge cases. But
65
  <summary>Click to expand: Minimal example with vLLM</summary>
66
 
67
  ```python
68
- from vllm import LLM, SamplingParams
69
- import math
70
-
71
- # Initialize the model with vLLM
72
- model = LLM(
73
- model="jhu-clsp/rank1-mistral-2501-24b",
74
- tensor_parallel_size=1, # Number of GPUs
75
- trust_remote_code=True,
76
- max_model_len=16000, # Context length
77
- gpu_memory_utilization=0.9,
78
- dtype="float16",
79
- )
80
-
81
- # Set up sampling parameters
82
- sampling_params = SamplingParams(
83
- temperature=0,
84
- max_tokens=8192,
85
- logprobs=20,
86
- stop=["</think> true", "</think> false"],
87
- skip_special_tokens=False
88
- )
89
-
90
- # Prepare the prompt
91
- def create_prompt(query, document):
92
- return (
93
- "Determine if the following passage is relevant to the query. "
94
- "Answer only with 'true' or 'false'.\n"
95
- f"Query: {query}\n"
96
- f"Passage: {document}\n"
97
- "<think>"
98
- )
99
-
100
- # Example usage
101
- query = "What are the effects of climate change?"
102
- document = "Climate change leads to rising sea levels, extreme weather events, and disruptions to ecosystems. These effects are caused by increasing greenhouse gas concentrations in the atmosphere due to human activities."
103
-
104
- # Generate prediction
105
- prompt = create_prompt(query, document)
106
- outputs = model.generate([prompt], sampling_params)
107
-
108
- # Extract score
109
- output = outputs[0].outputs[0]
110
- text = output.text
111
- final_logits = output.logprobs[-1]
112
-
113
- # Get token IDs for "true" and "false" tokens
114
- from transformers import AutoTokenizer
115
- tokenizer = AutoTokenizer.from_pretrained("jhu-clsp/rank1-mistral-2501-24b")
116
- true_token = tokenizer(" true", add_special_tokens=False).input_ids[0]
117
- false_token = tokenizer(" false", add_special_tokens=False).input_ids[0]
118
-
119
- # Calculate relevance score (probability of "true")
120
- true_logit = final_logits[true_token].logprob
121
- false_logit = final_logits[false_token].logprob
122
- true_score = math.exp(true_logit)
123
- false_score = math.exp(false_logit)
124
- relevance_score = true_score / (true_score + false_score)
125
-
126
- print(f"Reasoning chain: {text}")
127
- print(f"Relevance score: {relevance_score}")
128
  ```
129
 
130
  </details>
@@ -144,19 +86,7 @@ Please see the Github for detailed installation instructions.
144
  rank1 is compatible with the [MTEB benchmarking framework](https://github.com/embeddings-benchmark/mteb):
145
 
146
  ```python
147
- from mteb import MTEB
148
- from rank1 import rank1 # From the official repo
149
-
150
- # Initialize the model
151
- model = rank1(
152
- model_name_or_path="jhu-clsp/rank1-mistral-2501-24b",
153
- num_gpus=1,
154
- device="cuda"
155
- )
156
-
157
- # Run evaluation on specific tasks
158
- evaluation = MTEB(tasks=["NevIR"])
159
- results = evaluation.run(model)
160
  ```
161
 
162
  ## Citation
@@ -177,4 +107,4 @@ If you use rank1 in your research, please cite our work:
177
 
178
  ## License
179
 
180
- [MIT License](https://github.com/orionw/rank1/blob/main/LICENSE)
 
1
  ---
 
 
 
2
  base_model:
3
  - mistralai/Mistral-Small-24B-Base-2501
4
+ datasets:
5
+ - jhu-clsp/rank1-training-data
6
+ language:
7
+ - en
8
+ license: mit
9
+ library_name: transformers
10
+ pipeline_tag: feature-extraction
11
  tags:
12
  - reranker
13
  - retrieval
 
 
14
  ---
15
 
16
  # rank1-mistral-2501-24b: Test-Time Compute for Reranking in Information Retrieval
 
66
  <summary>Click to expand: Minimal example with vLLM</summary>
67
 
68
  ```python
69
+ # ... (example code remains unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  ```
71
 
72
  </details>
 
86
  rank1 is compatible with the [MTEB benchmarking framework](https://github.com/embeddings-benchmark/mteb):
87
 
88
  ```python
89
+ # ... (MTEB integration code remains unchanged)
 
 
 
 
 
 
 
 
 
 
 
 
90
  ```
91
 
92
  ## Citation
 
107
 
108
  ## License
109
 
110
+ [MIT License](https://github.com/orionw/rank1/blob/main/LICENSE)