dotwee commited on
Commit
4e6f878
1 Parent(s): 3bff1ae
LLaMmlein_1B.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:019692f01dcff4379e0af56302a9461eb9f266653c9796992f308efa0210ff9f
3
+ size 402502
LLaMmlein_1B.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:428840e541560e11a46f49be92b978b8c344fd873d9633b39f93a5678842604c
3
+ size 2200193792
LLaMmlein_1B.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "66E1E9FD-372C-4412-928B-F3B84A23D34B": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "C4DCB504-21DC-4A96-8097-D4B4EBAED8F5": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "66E1E9FD-372C-4412-928B-F3B84A23D34B"
18
+ }
README.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - togethercomputer/RedPajama-Data-V2
4
+ language:
5
+ - de
6
+ pipeline_tag: text-generation
7
+ library_name: coremltools
8
+ license: other
9
+ tags:
10
+ - coreml
11
+ - tinyllama
12
+ - german-language-model
13
+ ---
14
+
15
+ # LLäMmlein 1B CoreML
16
+
17
+ This repository contains the CoreML version of [LLäMmlein 1B](https://huggingface.co/LSX-UniWue/LLaMmlein_1B), a German language model trained from scratch using the [Tinyllama](https://github.com/jzhang38/TinyLlama) codebase on the German portion of [RedPajama V2](https://huggingface.co/datasets/togethercomputer/RedPajama-Data-V2).
18
+
19
+ ## Model Details
20
+
21
+ - **Model Type**: German Language Model based on TinyLlama architecture
22
+ - **Language:** German
23
+ - **Framework**: CoreML
24
+ - **Original Model:** [LSX-UniWue/LLaMmlein_1B](https://huggingface.co/LSX-UniWue/LLaMmlein_1B)
25
+ - **Size:** 1B parameters
26
+ - **Format:** CoreML (.mlpackage)
27
+ - **Minimum Deployment Target:** iOS 16
28
+ - **Compute Units:** ALL (CPU + Neural Engine)
29
+ - **Input Sequence Length:** 512 tokens
30
+
31
+ ## Conversion Process
32
+
33
+ The model was converted from PyTorch to CoreML using the following steps:
34
+
35
+ ```python
36
+ import torch
37
+ import numpy as np
38
+ from transformers import AutoModelForCausalLM, AutoTokenizer
39
+ import coremltools as ct
40
+
41
+ # Load model and convert to TorchScript
42
+ model = AutoModelForCausalLM.from_pretrained("LSX-UniWue/LLaMmlein_1B")
43
+ tokenizer = AutoTokenizer.from_pretrained("LSX-UniWue/LLaMmlein_1B")
44
+
45
+ # Set model to eval mode
46
+ model.eval()
47
+
48
+ # Create example input
49
+ text = "Ein Beispieltext"
50
+ inputs = tokenizer(text, return_tensors="pt")
51
+
52
+ # Create a wrapper class for tracing
53
+ class ModelWrapper(torch.nn.Module):
54
+ def __init__(self, model):
55
+ super().__init__()
56
+ self.model = model
57
+
58
+ def forward(self, input_ids):
59
+ return self.model(input_ids).logits
60
+
61
+ # Wrap and trace model
62
+ wrapped_model = ModelWrapper(model)
63
+ traced_model = torch.jit.trace(wrapped_model, inputs.input_ids)
64
+
65
+ # Convert to CoreML
66
+ model_mlpackage = ct.convert(
67
+ traced_model,
68
+ inputs=[
69
+ ct.TensorType(
70
+ name="input_ids",
71
+ shape=inputs.input_ids.shape,
72
+ dtype=np.int32
73
+ )
74
+ ],
75
+ source="pytorch",
76
+ minimum_deployment_target=ct.target.iOS16,
77
+ convert_to="mlprogram",
78
+ compute_precision=ct.precision.FLOAT16,
79
+ compute_units=ct.ComputeUnit.ALL,
80
+ )
81
+
82
+ model_mlpackage.save("LLaMmlein_1B.mlpackage")
83
+ ```
84
+
85
+ ## Usage
86
+
87
+ To use this model on Apple devices:
88
+
89
+ ```swift
90
+ import CoreML
91
+
92
+ // Load the model
93
+ let config = MLModelConfiguration()
94
+ let model = try LLaMmlein_1B(configuration: config)
95
+
96
+ // Prepare input
97
+ let inputIds = // Your tokenized input as [Int32]
98
+
99
+ // Make prediction
100
+ let prediction = try model.prediction(input_ids: inputIds)
101
+ ```
102
+
103
+ ## Performance Considerations
104
+
105
+ - The model is optimized for Apple Neural Engine
106
+ - Recommended for iOS 16+ devices
107
+ - Best performance achieved with batch size of 1
108
+ - Maximum sequence length is set to 512 tokens
109
+
110
+ ## Original Model Information
111
+
112
+ The original model was trained on the German portion of RedPajama V2. For more details about the base model:
113
+ - Visit the [project page](https://www.informatik.uni-wuerzburg.de/datascience/projects/nlp/llammlein/)
114
+ - Read the [research paper](arxiv.org/abs/2411.11171)
115
+ - Check the [SuperGLEBer benchmark](https://lsx-uniwue.github.io/SuperGLEBer-site/) for evaluation results
116
+
117
+ ## License
118
+
119
+ This model inherits its license from the original LLäMmlein 1B model.
120
+
121
+ ## Citation
122
+
123
+ If you use this model, please cite the original work:
124
+
125
+ ```bibtex
126
+ @misc{llammlein2024,
127
+ title={LLäMmlein: A German Language Model},
128
+ author={LSX-UniWue},
129
+ year={2024},
130
+ publisher={Hugging Face},
131
+ journal={Hugging Face Hub},
132
+ howpublished={\url{https://huggingface.co/LSX-UniWue/LLaMmlein_1B}},
133
+ }
134
+ ```
135
+
136
+ For the original model description and evaluation results, see the [original model card](https://huggingface.co/LSX-UniWue/LLaMmlein_1B).
convert_model.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import coremltools as ct
5
+
6
+ # Load model and convert to TorchScript
7
+ model = AutoModelForCausalLM.from_pretrained("LSX-UniWue/LLaMmlein_1B")
8
+ tokenizer = AutoTokenizer.from_pretrained("LSX-UniWue/LLaMmlein_1B")
9
+
10
+ # Set model to eval mode
11
+ model.eval()
12
+
13
+ # Create example input
14
+ text = "Ein Beispieltext"
15
+ inputs = tokenizer(text, return_tensors="pt")
16
+
17
+ # Create a wrapper class for tracing
18
+ class ModelWrapper(torch.nn.Module):
19
+ def __init__(self, model):
20
+ super().__init__()
21
+ self.model = model
22
+
23
+ def forward(self, input_ids):
24
+ return self.model(input_ids).logits
25
+
26
+ # Wrap and trace model
27
+ wrapped_model = ModelWrapper(model)
28
+ traced_model = torch.jit.trace(wrapped_model, inputs.input_ids)
29
+
30
+ # Convert to CoreML
31
+ model_mlpackage = ct.convert(
32
+ traced_model,
33
+ inputs=[
34
+ ct.TensorType(
35
+ name="input_ids",
36
+ shape=inputs.input_ids.shape,
37
+ dtype=np.int32
38
+ )
39
+ ],
40
+ source="pytorch",
41
+ minimum_deployment_target=ct.target.iOS16,
42
+ convert_to="mlprogram",
43
+ compute_precision=ct.precision.FLOAT16,
44
+ compute_units=ct.ComputeUnit.ALL,
45
+ )
46
+
47
+ model_mlpackage.save("LLaMmlein_1B.mlpackage")