qiaoruiyt commited on
Commit
103ffaa
·
verified ·
1 Parent(s): ac5b8b6

Update modeling_reasonir_8b.py

Browse files
Files changed (1) hide show
  1. modeling_reasonir_8b.py +17 -17
modeling_reasonir_8b.py CHANGED
@@ -26,22 +26,22 @@ import torch.nn.functional as F
26
  import torch.utils.checkpoint
27
  from torch import nn
28
 
29
- from ...activations import ACT2FN
30
- from ...cache_utils import Cache, DynamicCache, StaticCache
31
- from ...modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa, _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
32
- from ...generation import GenerationMixin
33
- from ...modeling_attn_mask_utils import AttentionMaskConverter
34
- from ...modeling_outputs import (
35
  BaseModelOutputWithPast,
36
  CausalLMOutputWithPast,
37
  QuestionAnsweringModelOutput,
38
  SequenceClassifierOutputWithPast,
39
  TokenClassifierOutput,
40
  )
41
- from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS
42
- from ...modeling_utils import PreTrainedModel
43
- from ...pytorch_utils import ALL_LAYERNORM_LAYERS
44
- from ...utils import (
45
  add_code_sample_docstrings,
46
  add_start_docstrings,
47
  add_start_docstrings_to_model_forward,
@@ -201,8 +201,8 @@ class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
201
 
202
  def rotate_half(x):
203
  """Rotates half the hidden dims of the input."""
204
- x1 = x[..., : x.shape[-1] // 2]
205
- x2 = x[..., x.shape[-1] // 2 :]
206
  return torch.cat((-x2, x1), dim=-1)
207
 
208
 
@@ -1313,9 +1313,9 @@ class LlamaForCausalLM(LlamaPreTrainedModel, GenerationMixin):
1313
  r"""
1314
  Args:
1315
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1316
- Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
1317
  config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
1318
- (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
1319
 
1320
  num_logits_to_keep (`int`, *optional*):
1321
  Calculate logits for the last `num_logits_to_keep` tokens. If `0`, calculate logits for all
@@ -1433,7 +1433,7 @@ class LlamaForSequenceClassification(LlamaPreTrainedModel):
1433
  ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
1434
  r"""
1435
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1436
- Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1437
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1438
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1439
  """
@@ -1628,7 +1628,7 @@ class LlamaForTokenClassification(LlamaPreTrainedModel):
1628
  ) -> Union[Tuple, TokenClassifierOutput]:
1629
  r"""
1630
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1631
- Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
1632
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1633
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1634
  """
@@ -1662,4 +1662,4 @@ class LlamaForTokenClassification(LlamaPreTrainedModel):
1662
  logits=logits,
1663
  hidden_states=outputs.hidden_states,
1664
  attentions=outputs.attentions,
1665
- )
 
26
  import torch.utils.checkpoint
27
  from torch import nn
28
 
29
+ from transformers.activations import ACT2FN
30
+ from transformers.cache_utils import Cache, DynamicCache, StaticCache
31
+ from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask, _prepare_4d_causal_attention_mask_for_sdpa, _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
32
+ from transformers.generation import GenerationMixin
33
+ from transformers.modeling_attn_mask_utils import AttentionMaskConverter
34
+ from transformers.modeling_outputs import (
35
  BaseModelOutputWithPast,
36
  CausalLMOutputWithPast,
37
  QuestionAnsweringModelOutput,
38
  SequenceClassifierOutputWithPast,
39
  TokenClassifierOutput,
40
  )
41
+ from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
42
+ from transformers.modeling_utils import PreTrainedModel
43
+ from transformers.pytorch_utils import ALL_LAYERNORM_LAYERS
44
+ from transformers.utils import (
45
  add_code_sample_docstrings,
46
  add_start_docstrings,
47
  add_start_docstrings_to_model_forward,
 
201
 
202
  def rotate_half(x):
203
  """Rotates half the hidden dims of the input."""
204
+ x1 = x[transformers., : x.shape[-1] // 2]
205
+ x2 = x[transformers., x.shape[-1] // 2 :]
206
  return torch.cat((-x2, x1), dim=-1)
207
 
208
 
 
1313
  r"""
1314
  Args:
1315
  labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
1316
+ Labels for computing the masked language modeling loss. Indices should either be in `[0, transformers.,
1317
  config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
1318
+ (masked), the loss is only computed for the tokens with labels in `[0, transformers., config.vocab_size]`.
1319
 
1320
  num_logits_to_keep (`int`, *optional*):
1321
  Calculate logits for the last `num_logits_to_keep` tokens. If `0`, calculate logits for all
 
1433
  ) -> Union[Tuple, SequenceClassifierOutputWithPast]:
1434
  r"""
1435
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1436
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
1437
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1438
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1439
  """
 
1628
  ) -> Union[Tuple, TokenClassifierOutput]:
1629
  r"""
1630
  labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
1631
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, transformers.,
1632
  config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
1633
  `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
1634
  """
 
1662
  logits=logits,
1663
  hidden_states=outputs.hidden_states,
1664
  attentions=outputs.attentions,
1665
+ )