Description
mistralai/Mistral-7B-v0.1
model fine-tuned over 52k alpaca dataset
How to use it
# pip install transformers==4.35.2
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from transformers import pipeline
model_id="MaziyarPanahi/Mistral-7B-Alpaca-52k-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_id)
streamer = TextStreamer(tokenizer)
model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.float16,
device_map="auto",
)
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=1024,
temperature=0.1,
do_sample=True,
top_p=0.95,
repetition_penalty=1.15,
return_full_text=False,
streamer=streamer
)
prompt = """Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
describe about pros and cons of docker system. Answer in bullet point
### Response:
"""
res = pipe(prompt)[0]['generated_text']
Results:
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
describe about pros and cons of docker system. Answer in bullet point
### Response:
Pros of Docker System:
- Improved portability - Docker containers can be easily moved between different environments, making it easier to deploy applications across multiple platforms.
- Increased security - Containers are isolated from each other, which helps prevent malicious code from spreading throughout the system.
- Better resource utilization - Containers allow for better resource management by allowing users to run multiple applications on a single host without having to worry about conflicts or performance issues.
Cons of Docker System:
- Learning curve - It takes time to learn how to use Docker effectively, as there are many commands and concepts involved.
- Limited customization options - While Docker provides some basic configuration options, more advanced features such as network routing require additional tools.
- Performance overhead - Running multiple containers on a single host may result in slower performance due to increased memory usage.</s>
Eval
{
"all": {
"acc": 0.6309850839451187,
"acc_stderr": 0.032333688535613636,
"acc_norm": 0.6368691004374645,
"acc_norm_stderr": 0.03298401757997533,
"mc1": 0.29008567931456547,
"mc1_stderr": 0.01588623687420952,
"mc2": 0.41501661742948026,
"mc2_stderr": 0.014285902986671931
},
"harness|arc:challenge|25": {
"acc": 0.5750853242320819,
"acc_stderr": 0.014445698968520767,
"acc_norm": 0.6092150170648464,
"acc_norm_stderr": 0.01425856388051378
},
"harness|hellaswag|10": {
"acc": 0.6221868153754232,
"acc_stderr": 0.0048384969668239025,
"acc_norm": 0.8212507468631747,
"acc_norm_stderr": 0.0038235918141330347
},
"harness|hendrycksTest-abstract_algebra|5": {
"acc": 0.32,
"acc_stderr": 0.046882617226215034,
"acc_norm": 0.32,
"acc_norm_stderr": 0.046882617226215034
},
"harness|hendrycksTest-anatomy|5": {
"acc": 0.6,
"acc_stderr": 0.04232073695151589,
"acc_norm": 0.6,
"acc_norm_stderr": 0.04232073695151589
},
"harness|hendrycksTest-astronomy|5": {
"acc": 0.6447368421052632,
"acc_stderr": 0.038947344870133176,
"acc_norm": 0.6447368421052632,
"acc_norm_stderr": 0.038947344870133176
},
"harness|hendrycksTest-business_ethics|5": {
"acc": 0.57,
"acc_stderr": 0.04975698519562428,
"acc_norm": 0.57,
"acc_norm_stderr": 0.04975698519562428
},
"harness|hendrycksTest-clinical_knowledge|5": {
"acc": 0.6792452830188679,
"acc_stderr": 0.02872750295788027,
"acc_norm": 0.6792452830188679,
"acc_norm_stderr": 0.02872750295788027
},
"harness|hendrycksTest-college_biology|5": {
"acc": 0.7430555555555556,
"acc_stderr": 0.03653946969442099,
"acc_norm": 0.7430555555555556,
"acc_norm_stderr": 0.03653946969442099
},
"harness|hendrycksTest-college_chemistry|5": {
"acc": 0.49,
"acc_stderr": 0.05024183937956912,
"acc_norm": 0.49,
"acc_norm_stderr": 0.05024183937956912
},
"harness|hendrycksTest-college_computer_science|5": {
"acc": 0.56,
"acc_stderr": 0.04988876515698589,
"acc_norm": 0.56,
"acc_norm_stderr": 0.04988876515698589
},
"harness|hendrycksTest-college_mathematics|5": {
"acc": 0.36,
"acc_stderr": 0.048241815132442176,
"acc_norm": 0.36,
"acc_norm_stderr": 0.048241815132442176
},
"harness|hendrycksTest-college_medicine|5": {
"acc": 0.653179190751445,
"acc_stderr": 0.036291466701596636,
"acc_norm": 0.653179190751445,
"acc_norm_stderr": 0.036291466701596636
},
"harness|hendrycksTest-college_physics|5": {
"acc": 0.4019607843137255,
"acc_stderr": 0.048786087144669955,
"acc_norm": 0.4019607843137255,
"acc_norm_stderr": 0.048786087144669955
},
"harness|hendrycksTest-computer_security|5": {
"acc": 0.79,
"acc_stderr": 0.04093601807403326,
"acc_norm": 0.79,
"acc_norm_stderr": 0.04093601807403326
},
"harness|hendrycksTest-conceptual_physics|5": {
"acc": 0.5702127659574469,
"acc_stderr": 0.03236214467715564,
"acc_norm": 0.5702127659574469,
"acc_norm_stderr": 0.03236214467715564
},
"harness|hendrycksTest-econometrics|5": {
"acc": 0.49122807017543857,
"acc_stderr": 0.047028804320496165,
"acc_norm": 0.49122807017543857,
"acc_norm_stderr": 0.047028804320496165
},
"harness|hendrycksTest-electrical_engineering|5": {
"acc": 0.5862068965517241,
"acc_stderr": 0.04104269211806232,
"acc_norm": 0.5862068965517241,
"acc_norm_stderr": 0.04104269211806232
},
"harness|hendrycksTest-elementary_mathematics|5": {
"acc": 0.3915343915343915,
"acc_stderr": 0.025138091388851116,
"acc_norm": 0.3915343915343915,
"acc_norm_stderr": 0.025138091388851116
},
"harness|hendrycksTest-formal_logic|5": {
"acc": 0.4444444444444444,
"acc_stderr": 0.04444444444444449,
"acc_norm": 0.4444444444444444,
"acc_norm_stderr": 0.04444444444444449
},
"harness|hendrycksTest-global_facts|5": {
"acc": 0.32,
"acc_stderr": 0.04688261722621504,
"acc_norm": 0.32,
"acc_norm_stderr": 0.04688261722621504
},
"harness|hendrycksTest-high_school_biology|5": {
"acc": 0.7419354838709677,
"acc_stderr": 0.02489246917246283,
"acc_norm": 0.7419354838709677,
"acc_norm_stderr": 0.02489246917246283
},
"harness|hendrycksTest-high_school_chemistry|5": {
"acc": 0.5024630541871922,
"acc_stderr": 0.035179450386910616,
"acc_norm": 0.5024630541871922,
"acc_norm_stderr": 0.035179450386910616
},
"harness|hendrycksTest-high_school_computer_science|5": {
"acc": 0.67,
"acc_stderr": 0.047258156262526066,
"acc_norm": 0.67,
"acc_norm_stderr": 0.047258156262526066
},
"harness|hendrycksTest-high_school_european_history|5": {
"acc": 0.7575757575757576,
"acc_stderr": 0.03346409881055953,
"acc_norm": 0.7575757575757576,
"acc_norm_stderr": 0.03346409881055953
},
"harness|hendrycksTest-high_school_geography|5": {
"acc": 0.7929292929292929,
"acc_stderr": 0.028869778460267042,
"acc_norm": 0.7929292929292929,
"acc_norm_stderr": 0.028869778460267042
},
"harness|hendrycksTest-high_school_government_and_politics|5": {
"acc": 0.8601036269430051,
"acc_stderr": 0.025033870583015184,
"acc_norm": 0.8601036269430051,
"acc_norm_stderr": 0.025033870583015184
},
"harness|hendrycksTest-high_school_macroeconomics|5": {
"acc": 0.6358974358974359,
"acc_stderr": 0.024396672985094764,
"acc_norm": 0.6358974358974359,
"acc_norm_stderr": 0.024396672985094764
},
"harness|hendrycksTest-high_school_mathematics|5": {
"acc": 0.362962962962963,
"acc_stderr": 0.029318203645206865,
"acc_norm": 0.362962962962963,
"acc_norm_stderr": 0.029318203645206865
},
"harness|hendrycksTest-high_school_microeconomics|5": {
"acc": 0.6218487394957983,
"acc_stderr": 0.03149930577784906,
"acc_norm": 0.6218487394957983,
"acc_norm_stderr": 0.03149930577784906
},
"harness|hendrycksTest-high_school_physics|5": {
"acc": 0.32450331125827814,
"acc_stderr": 0.038227469376587525,
"acc_norm": 0.32450331125827814,
"acc_norm_stderr": 0.038227469376587525
},
"harness|hendrycksTest-high_school_psychology|5": {
"acc": 0.8146788990825689,
"acc_stderr": 0.016659279700295838,
"acc_norm": 0.8146788990825689,
"acc_norm_stderr": 0.016659279700295838
},
"harness|hendrycksTest-high_school_statistics|5": {
"acc": 0.49537037037037035,
"acc_stderr": 0.03409825519163572,
"acc_norm": 0.49537037037037035,
"acc_norm_stderr": 0.03409825519163572
},
"harness|hendrycksTest-high_school_us_history|5": {
"acc": 0.7892156862745098,
"acc_stderr": 0.028626547912437406,
"acc_norm": 0.7892156862745098,
"acc_norm_stderr": 0.028626547912437406
},
"harness|hendrycksTest-high_school_world_history|5": {
"acc": 0.7552742616033755,
"acc_stderr": 0.027985699387036423,
"acc_norm": 0.7552742616033755,
"acc_norm_stderr": 0.027985699387036423
},
"harness|hendrycksTest-human_aging|5": {
"acc": 0.6636771300448431,
"acc_stderr": 0.031708824268455,
"acc_norm": 0.6636771300448431,
"acc_norm_stderr": 0.031708824268455
},
"harness|hendrycksTest-human_sexuality|5": {
"acc": 0.7862595419847328,
"acc_stderr": 0.0359546161177469,
"acc_norm": 0.7862595419847328,
"acc_norm_stderr": 0.0359546161177469
},
"harness|hendrycksTest-international_law|5": {
"acc": 0.7933884297520661,
"acc_stderr": 0.03695980128098824,
"acc_norm": 0.7933884297520661,
"acc_norm_stderr": 0.03695980128098824
},
"harness|hendrycksTest-jurisprudence|5": {
"acc": 0.7592592592592593,
"acc_stderr": 0.04133119440243838,
"acc_norm": 0.7592592592592593,
"acc_norm_stderr": 0.04133119440243838
},
"harness|hendrycksTest-logical_fallacies|5": {
"acc": 0.803680981595092,
"acc_stderr": 0.031207970394709218,
"acc_norm": 0.803680981595092,
"acc_norm_stderr": 0.031207970394709218
},
"harness|hendrycksTest-machine_learning|5": {
"acc": 0.5178571428571429,
"acc_stderr": 0.047427623612430116,
"acc_norm": 0.5178571428571429,
"acc_norm_stderr": 0.047427623612430116
},
"harness|hendrycksTest-management|5": {
"acc": 0.8252427184466019,
"acc_stderr": 0.03760178006026621,
"acc_norm": 0.8252427184466019,
"acc_norm_stderr": 0.03760178006026621
},
"harness|hendrycksTest-marketing|5": {
"acc": 0.8632478632478633,
"acc_stderr": 0.022509033937077816,
"acc_norm": 0.8632478632478633,
"acc_norm_stderr": 0.022509033937077816
},
"harness|hendrycksTest-medical_genetics|5": {
"acc": 0.74,
"acc_stderr": 0.04408440022768078,
"acc_norm": 0.74,
"acc_norm_stderr": 0.04408440022768078
},
"harness|hendrycksTest-miscellaneous|5": {
"acc": 0.8173690932311622,
"acc_stderr": 0.013816335389973136,
"acc_norm": 0.8173690932311622,
"acc_norm_stderr": 0.013816335389973136
},
"harness|hendrycksTest-moral_disputes|5": {
"acc": 0.7023121387283237,
"acc_stderr": 0.024617055388677,
"acc_norm": 0.7023121387283237,
"acc_norm_stderr": 0.024617055388677
},
"harness|hendrycksTest-moral_scenarios|5": {
"acc": 0.2335195530726257,
"acc_stderr": 0.014149575348976269,
"acc_norm": 0.2335195530726257,
"acc_norm_stderr": 0.014149575348976269
},
"harness|hendrycksTest-nutrition|5": {
"acc": 0.7450980392156863,
"acc_stderr": 0.024954184324879905,
"acc_norm": 0.7450980392156863,
"acc_norm_stderr": 0.024954184324879905
},
"harness|hendrycksTest-philosophy|5": {
"acc": 0.7106109324758842,
"acc_stderr": 0.025755865922632945,
"acc_norm": 0.7106109324758842,
"acc_norm_stderr": 0.025755865922632945
},
"harness|hendrycksTest-prehistory|5": {
"acc": 0.7191358024691358,
"acc_stderr": 0.025006469755799215,
"acc_norm": 0.7191358024691358,
"acc_norm_stderr": 0.025006469755799215
},
"harness|hendrycksTest-professional_accounting|5": {
"acc": 0.4716312056737589,
"acc_stderr": 0.029779450957303062,
"acc_norm": 0.4716312056737589,
"acc_norm_stderr": 0.029779450957303062
},
"harness|hendrycksTest-professional_law|5": {
"acc": 0.4498044328552803,
"acc_stderr": 0.012705721498565107,
"acc_norm": 0.4498044328552803,
"acc_norm_stderr": 0.012705721498565107
},
"harness|hendrycksTest-professional_medicine|5": {
"acc": 0.6580882352941176,
"acc_stderr": 0.02881472242225418,
"acc_norm": 0.6580882352941176,
"acc_norm_stderr": 0.02881472242225418
},
"harness|hendrycksTest-professional_psychology|5": {
"acc": 0.6519607843137255,
"acc_stderr": 0.019270998708223974,
"acc_norm": 0.6519607843137255,
"acc_norm_stderr": 0.019270998708223974
},
"harness|hendrycksTest-public_relations|5": {
"acc": 0.6636363636363637,
"acc_stderr": 0.04525393596302506,
"acc_norm": 0.6636363636363637,
"acc_norm_stderr": 0.04525393596302506
},
"harness|hendrycksTest-security_studies|5": {
"acc": 0.7224489795918367,
"acc_stderr": 0.028666857790274645,
"acc_norm": 0.7224489795918367,
"acc_norm_stderr": 0.028666857790274645
},
"harness|hendrycksTest-sociology|5": {
"acc": 0.8557213930348259,
"acc_stderr": 0.02484575321230604,
"acc_norm": 0.8557213930348259,
"acc_norm_stderr": 0.02484575321230604
},
"harness|hendrycksTest-us_foreign_policy|5": {
"acc": 0.86,
"acc_stderr": 0.03487350880197771,
"acc_norm": 0.86,
"acc_norm_stderr": 0.03487350880197771
},
"harness|hendrycksTest-virology|5": {
"acc": 0.5481927710843374,
"acc_stderr": 0.03874371556587953,
"acc_norm": 0.5481927710843374,
"acc_norm_stderr": 0.03874371556587953
},
"harness|hendrycksTest-world_religions|5": {
"acc": 0.8421052631578947,
"acc_stderr": 0.027966785859160896,
"acc_norm": 0.8421052631578947,
"acc_norm_stderr": 0.027966785859160896
},
"harness|truthfulqa:mc|0": {
"mc1": 0.29008567931456547,
"mc1_stderr": 0.01588623687420952,
"mc2": 0.41501661742948026,
"mc2_stderr": 0.014285902986671931
},
"harness|winogrande|5": {
"acc": 0.7734806629834254,
"acc_stderr": 0.011764149054698332
},
"harness|gsm8k|5": {
"acc": 0.37452615617892343,
"acc_stderr": 0.013331774158491393
}
}
Open LLM Leaderboard Evaluation Results
Detailed results can be found here
Metric | Value |
---|---|
Avg. | 60.46 |
AI2 Reasoning Challenge (25-Shot) | 60.92 |
HellaSwag (10-Shot) | 82.13 |
MMLU (5-Shot) | 63.41 |
TruthfulQA (0-shot) | 41.50 |
Winogrande (5-shot) | 77.35 |
GSM8k (5-shot) | 37.45 |
- Downloads last month
- 24
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social
visibility and check back later, or deploy to Inference Endpoints (dedicated)
instead.
Model tree for MaziyarPanahi/Mistral-7B-Alpaca-52k-v0.1
Dataset used to train MaziyarPanahi/Mistral-7B-Alpaca-52k-v0.1
Evaluation results
- normalized accuracy on AI2 Reasoning Challenge (25-Shot)test set Open LLM Leaderboard60.920
- normalized accuracy on HellaSwag (10-Shot)validation set Open LLM Leaderboard82.130
- accuracy on MMLU (5-Shot)test set Open LLM Leaderboard63.410
- mc2 on TruthfulQA (0-shot)validation set Open LLM Leaderboard41.500
- accuracy on Winogrande (5-shot)validation set Open LLM Leaderboard77.350
- accuracy on GSM8k (5-shot)test set Open LLM Leaderboard37.450