Upload Hindi CausalLM model with RoPE

Browse files

Files changed (4) hide show

README.md +2 -132
config.json +1 -0
model.safetensors +2 -2
pytorch_model.bin +2 -2

README.md CHANGED Viewed

@@ -6,6 +6,7 @@ tags:
 - text-generation
 - causal-lm
 - lm
 license: mit
 datasets:
 - custom_hindi_corpus
@@ -14,145 +15,14 @@ datasets:
 # Hindi-CausalLM
 A Hindi language generation model with the following specifications:
-## Usage
-You can use this model with the following code:
-```python
-import torch
-from hindi_embeddings import SentencePieceTokenizerWrapper
-from convaicausallm_model import ConvaiCausalLM, ConvaiCausalLMConfig
-from safetensors.torch import load_file
-import os
-class HindiLLMGenerator:
-    def __init__(self, model_path, device=None):
-        # Set device
-        if device is None:
-            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        else:
-            self.device = torch.device(device)
-        print(f"Using device: {self.device}")
-        # Load tokenizer
-        tokenizer_path = os.path.join(model_path, "tokenizer.model")
-        self.tokenizer = SentencePieceTokenizerWrapper(tokenizer_path)
-        # Load model config
-        config_path = os.path.join(model_path, "config.json")
-        import json
-        with open(config_path, 'r') as f:
-            config_dict = json.load(f)
-        self.config = ConvaiCausalLMConfig(**config_dict)
-        # Load model - try safetensors first, fall back to PyTorch bin if needed
-        safetensors_path = os.path.join(model_path, "model.safetensors")
-        pytorch_path = os.path.join(model_path, "pytorch_model.bin")
-        self.model = ConvaiCausalLM(self.config)
-        # Check which format is available and load accordingly
-        if os.path.exists(safetensors_path):
-            print(f"Loading model from SafeTensors")
-            state_dict = load_file(safetensors_path, device="cpu")
-            self.model.load_state_dict(state_dict)
-        elif os.path.exists(pytorch_path):
-            print(f"Loading model from PyTorch bin")
-            self.model.load_state_dict(torch.load(pytorch_path, map_location="cpu"))
-        # Move model to device and set to evaluation mode
-        self.model.to(self.device)
-        self.model.eval()
-    def generate(self, prompt, max_length=100, temperature=0.8, top_k=50, top_p=0.9,
-                 repetition_penalty=1.1, do_sample=True):
-        # Tokenize the prompt
-        input_ids = self.tokenizer.sp_model.EncodeAsIds(prompt)
-        input_tensor = torch.tensor([input_ids], dtype=torch.long).to(self.device)
-        # Start with the input tensor
-        output_sequence = input_tensor.clone()
-        # Generate tokens one by one
-        for _ in range(max_length - len(input_ids)):
-            with torch.no_grad():
-                # Get the model's output for the current sequence
-                outputs = self.model(output_sequence)
-                next_token_logits = outputs[0, -1, :]
-                # Apply temperature
-                if temperature > 0:
-                    next_token_logits = next_token_logits / temperature
-                # Apply repetition penalty
-                if repetition_penalty > 1.0:
-                    for token_id in output_sequence[0].tolist():
-                        next_token_logits[token_id] /= repetition_penalty
-                # Filter with top-k sampling
-                if top_k > 0:
-                    top_k_values, top_k_indices = torch.topk(next_token_logits, top_k)
-                    next_token_logits = torch.full_like(next_token_logits, float('-inf'))
-                    next_token_logits.scatter_(0, top_k_indices, top_k_values)
-                # Filter with top-p/nucleus sampling
-                if top_p < 1.0 and do_sample:
-                    sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
-                    cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1)
-                    # Remove tokens with cumulative probability above the threshold
-                    sorted_indices_to_remove = cumulative_probs > top_p
-                    # Shift the indices to the right to keep the first token above the threshold
-                    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
-                    sorted_indices_to_remove[..., 0] = 0
-                    indices_to_remove = sorted_indices[sorted_indices_to_remove]
-                    next_token_logits[indices_to_remove] = float('-inf')
-                # Sample or choose the next token
-                if do_sample:
-                    probs = torch.softmax(next_token_logits, dim=-1)
-                    next_token = torch.multinomial(probs, num_samples=1)
-                else:
-                    next_token = torch.argmax(next_token_logits, dim=-1).unsqueeze(0)
-                # Add the next token to the sequence
-                output_sequence = torch.cat([output_sequence, next_token.unsqueeze(0)], dim=1)
-                # Check if we've generated an end token
-                if next_token.item() == self.tokenizer.eos_token_id:
-                    break
-        # Decode the generated sequence
-        generated_ids = output_sequence[0].tolist()
-        generated_text = self.tokenizer.sp_model.DecodeIds(generated_ids)
-        return generated_text
-# Example usage
-if __name__ == "__main__":
-    generator = HindiLLMGenerator("path/to/model")
-    result = generator.generate("भारत एक विशाल देश है")
-    print(result)
-```
-## Example Prompts
-Try the model with these example prompts:
-```
-भारत एक विशाल देश है
-मुझे हिंदी में एक कहानी सुनाओ
-आज का मौसम बहुत अच्छा है
-हिंदी साहित्य की प्रमुख विशेषताएं
-```
 ## Model Architecture
 - **Type**: Causal Language Model with Transformer architecture
 - **Hidden size**: 768
 - **Layers**: 12
 - **Attention heads**: 16
 - **Key-value heads**: 4 (using grouped-query attention)
 - **Vocabulary size**: 16000
 - **Parameters**: ~74.1M
 - **Context window**: 512 tokens

 - text-generation
 - causal-lm
 - lm
+- rope
 license: mit
 datasets:
 - custom_hindi_corpus
 # Hindi-CausalLM
 A Hindi language generation model with the following specifications:
 ## Model Architecture
 - **Type**: Causal Language Model with Transformer architecture
 - **Hidden size**: 768
 - **Layers**: 12
 - **Attention heads**: 16
 - **Key-value heads**: 4 (using grouped-query attention)
+- **Position encoding**: Rotary Position Embeddings (RoPE)
 - **Vocabulary size**: 16000
 - **Parameters**: ~74.1M
 - **Context window**: 512 tokens

config.json CHANGED Viewed

@@ -78,6 +78,7 @@
   "intermediate_size": 3072,
   "hidden_act": "silu",
   "max_position_embeddings": 512,
   "model_type": "convaicausallm",
   "auto_map": {
     "AutoConfig": "configuration_convaicausallm.ConvaiCausalLMConfig",

   "intermediate_size": 3072,
   "hidden_act": "silu",
   "max_position_embeddings": 512,
+  "rope_theta": 10000.0,
   "model_type": "convaicausallm",
   "auto_map": {
     "AutoConfig": "configuration_convaicausallm.ConvaiCausalLMConfig",

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0b013bef88c7f7cbf72bd25d7868854da142b00a899adc94175294b50a04d4dd
-size 408609208

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ba8afe67cfd8a9622ba63f0607352cab2fda4a584a712a941cce9e82946c4a4
+size 409791136

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f57c2ac93848af94c4dcbb2f93a6406135c030a2e0c9b717588f4f5929b13551
-size 408661966

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1e3091e78fdc85ef2ed3a1e1bb0e07408327b187fb7bb733f63014a1f6d25a0
+size 409849254