qiaoruiyt commited on
Commit
8aa986d
·
verified ·
1 Parent(s): a2d0b84

Update modeling_reasonir_8b.py

Browse files
Files changed (1) hide show
  1. modeling_reasonir_8b.py +8 -8
modeling_reasonir_8b.py CHANGED
@@ -201,8 +201,8 @@ class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
201
 
202
  def rotate_half(x):
203
  """Rotates half the hidden dims of the input."""
204
- x1 = x[transformers., : x.shape[-1] // 2]
205
- x2 = x[transformers., x.shape[-1] // 2 :]
206
  return torch.cat((-x2, x1), dim=-1)
207
 
208
 
@@ -1313,9 +1313,9 @@ class LlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
1313
  r"""
1314
  Args:
1315
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1316
- Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
1317
  config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
1318
- (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.
1319
 
1320
  num_logits_to_keep (`int`, *optional*):
1321
  Calculate logits for the last `num_logits_to_keep` tokens. If `0`, calculate logits for all
@@ -1433,7 +1433,7 @@ class LlamaForSequenceClassification(LlamaPreTrainedModel):
1433
  ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
1434
  r"""
1435
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1436
- Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
1437
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1438
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1439
  """
@@ -1500,7 +1500,7 @@ SQuAD (a linear layer on top of the hidden-states output to compute `span start
1500
  class LlamaForQuestionAnswering(LlamaPreTrainedModel):
1501
  base_model_prefix = "transformer"
1502
 
1503
- # Copied from transformers.models.bloom.modeling_bloom.BloomForQuestionAnswering.__init__ with Bloom->Llama
1504
  def __init__(self, config):
1505
  super().__init__(config)
1506
  self.transformer = LlamaModel(config)
@@ -1628,7 +1628,7 @@ class LlamaForTokenClassification(LlamaPreTrainedModel):
1628
  ) -> Union[Tuple, TokenClassifierOutput]:
1629
  r"""
1630
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1631
- Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
1632
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1633
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1634
  """
@@ -1662,4 +1662,4 @@ class LlamaForTokenClassification(LlamaPreTrainedModel):
1662
  logits=logits,
1663
  hidden_states=outputs.hidden_states,
1664
  attentions=outputs.attentions,
1665
- )
 
201
 
202
  def rotate_half(x):
203
  """Rotates half the hidden dims of the input."""
204
+ x1 = x[..., : x.shape[-1] // 2]
205
+ x2 = x[..., x.shape[-1] // 2 :]
206
  return torch.cat((-x2, x1), dim=-1)
207
 
208
 
 
1313
  r"""
1314
  Args:
1315
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1316
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
1317
  config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
1318
+ (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
1319
 
1320
  num_logits_to_keep (`int`, *optional*):
1321
  Calculate logits for the last `num_logits_to_keep` tokens. If `0`, calculate logits for all
 
1433
  ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
1434
  r"""
1435
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1436
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1437
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1438
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1439
  """
 
1500
  class LlamaForQuestionAnswering(LlamaPreTrainedModel):
1501
  base_model_prefix = "transformer"
1502
 
1503
+ # Copied from ...models.bloom.modeling_bloom.BloomForQuestionAnswering.__init__ with Bloom->Llama
1504
  def __init__(self, config):
1505
  super().__init__(config)
1506
  self.transformer = LlamaModel(config)
 
1628
  ) -> Union[Tuple, TokenClassifierOutput]:
1629
  r"""
1630
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1631
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1632
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1633
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1634
  """
 
1662
  logits=logits,
1663
  hidden_states=outputs.hidden_states,
1664
  attentions=outputs.attentions,
1665
+ )