metadata
license: apache-2.0
dataset: sft
tags:
- finetuned
- multimodal
inference: false
These are weights for a version of checkpoints/stage2/llava-moleculestm-vicuna-7b-v1.5-pretrain_rxn_nc
finetuned for multimodal applications.
Modalities
- Molecule2DModality (use
<molecule_2d>
in text and providemolecules
Usage
GitHub: https://github.com/sshh12/bioagent (includes training scripts and basic inference server)
Dataset
sft (765299 examples)
{'molecule_2d': [(tensor([[34, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 6, 0],
[ 5, 0]]), tensor([[0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 1],
[1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 1, 6]]), tensor([[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0]])), (tensor([[5, 0],
[5, 0],
[5, 0],
[5, 0],
[5, 0],
[6, 0],
[5, 0],
[5, 0]]), tensor([[0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 4, 6, 6, 7, 7, 1],
[1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 4, 7, 6, 1, 7]]), tensor([[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0]])), (tensor([[ 7, 0],
[15, 0],
[ 7, 0],
[ 7, 0],
[45, 0],
[ 6, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 8, 0],
[ 8, 0],
[ 8, 0]]), tensor([[ 0, 1, 1, 2, 1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 1, 18,
18, 19, 18, 20, 18, 21, 17, 4, 11, 6, 17, 12],
[ 1, 0, 2, 1, 3, 1, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8,
10, 9, 11, 10, 12, 11, 13, 12, 14, 13, 15, 14, 16, 15, 17, 16, 18, 1,
19, 18, 20, 18, 21, 18, 4, 17, 6, 11, 12, 17]]), tensor([[1, 0],
[1, 0],
[1, 0],
[1, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0]])), (tensor([[ 5, 0],
[ 7, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 7, 0],
[ 5, 0],
[ 5, 0],
[14, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0],
[ 5, 0]]), tensor([[ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 5, 8, 8, 9,
9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 16, 18,
14, 19, 9, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27,
26, 28, 24, 29, 8, 30, 30, 31, 31, 32, 32, 33, 33, 34, 33, 35, 32, 36,
36, 37, 37, 38, 38, 39, 38, 40, 37, 41, 41, 42, 42, 43, 43, 44, 43, 45,
30, 2, 42, 31, 18, 10, 28, 20, 19, 10, 29, 20, 17, 12, 27, 22],
[ 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 5, 9, 8,
10, 9, 11, 10, 12, 11, 13, 12, 14, 13, 15, 14, 16, 15, 17, 16, 18, 16,
19, 14, 20, 9, 21, 20, 22, 21, 23, 22, 24, 23, 25, 24, 26, 25, 27, 26,
28, 26, 29, 24, 30, 8, 31, 30, 32, 31, 33, 32, 34, 33, 35, 33, 36, 32,
37, 36, 38, 37, 39, 38, 40, 38, 41, 37, 42, 41, 43, 42, 44, 43, 45, 43,
2, 30, 31, 42, 10, 18, 20, 28, 10, 19, 20, 29, 12, 17, 22, 27]]), tensor([[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]])), (tensor([[ 5, 0],
[ 5, 0],
[ 6, 0],
[14, 0],
[ 6, 0],
[14, 0],
[ 6, 0],
[ 5, 0],
[ 5, 0],
[ 6, 0],
[ 5, 0],
[ 5, 0],
[ 6, 0],
[ 5, 0],
[ 5, 0],
[ 6, 0],
[ 5, 0],
[ 5, 0],
[ 6, 0],
[ 5, 0],
[ 5, 0]]), tensor([[ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 6, 8, 5, 9,
9, 10, 9, 11, 5, 12, 12, 13, 12, 14, 3, 15, 15, 16, 15, 17, 3, 18,
18, 19, 18, 20],
[ 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 6, 9, 5,
10, 9, 11, 9, 12, 5, 13, 12, 14, 12, 15, 3, 16, 15, 17, 15, 18, 3,
19, 18, 20, 18]]), tensor([[0, 0],
[0, 0],
[0, 0],
[0, 0],
[1, 0],
[1, 0],
[0, 0],
[0, 0],
[1, 0],
[1, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]])), (tensor([[5, 0],
[5, 0],
[5, 0],
[5, 0],
[6, 0],
[7, 0]]), tensor([[0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1],
[1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 1, 5]]), tensor([[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0]])), (tensor([[5, 0],
[5, 0],
[5, 0],
[5, 0],
[5, 0],
[6, 0],
[5, 0],
[5, 0],
[5, 0],
[5, 0],
[6, 0],
[5, 0],
[5, 0],
[5, 0]]), tensor([[ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9,
9, 10, 10, 11, 4, 12, 12, 13, 13, 1, 11, 6],
[ 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8,
10, 9, 11, 10, 12, 4, 13, 12, 1, 13, 6, 11]]), tensor([[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0],
[3, 0]]))], 'input_ids': tensor([ 1, 518, 25580, 29962, 3532, 14816, 29903, 6778, 13, 3492,
526, 263, 8950, 391, 29889, 2567, 366, 526, 2183, 263,
19848, 6306, 29889, 3529, 8500, 278, 1950, 337, 351, 1237,
310, 278, 19848, 29889, 450, 19848, 6306, 756, 278, 1494,
3402, 29901, 13, 28956, 13, 8423, 424, 29896, 29889, 8423,
424, 29906, 29889, 2023, 869, 8423, 424, 29940, 6778, 4704,
13, 28956, 13, 1576, 736, 995, 881, 367, 297, 3464,
310, 29871, 29900, 29899, 29896, 29889, 450, 6133, 278, 995,
29892, 278, 901, 5517, 278, 19848, 338, 304, 6403, 29889,
29871, 13, 4806, 3867, 278, 3829, 310, 278, 337, 7387,
29889, 13, 29966, 829, 14816, 29903, 6778, 13, 13, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, 869, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, 869, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, 869, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, 869, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, 869, -3996, -3996,
-3996, -3996, -3996, -3996, 5099, -3996, -3996, -3996, -3996, -3996,
-3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, -3996, 29871,
5293, 278, 22233, 19848, 2472, 29892, 825, 338, 278, 11959,
310, 278, 19848, 29915, 29879, 7709, 29973, 518, 29914, 25580,
29962, 259, 29900, 29889, 29900, 29941, 29953, 29947, 29871, 2]), 'labels': tensor([ 1, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, -100, -100, -100, -100, -100, -100, -100, -100, -100,
-100, 259, 29900, 29889, 29900, 29941, 29953, 29947, 29871, 2])}
Training Device(s)
name, pci.bus_id, vbios_version
A100-SXM4-40GB, 00000000:07:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:0F:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:47:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:4E:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:87:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:90:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:B7:00.0, 92.00.45.00.03
A100-SXM4-40GB, 00000000:BD:00.0, 92.00.45.00.03
Model
LlamaLMMForCausalLM.model =
LlamaLMMForCausalLM(
(model): LlamaLMMModel(
(embed_tokens): Embedding(32000, 4096, padding_idx=0)
(layers): ModuleList(
(0-31): 32 x LlamaDecoderLayer(
(self_attn): LlamaSdpaAttention(
(q_proj): Linear(in_features=4096, out_features=4096, bias=False)
(k_proj): Linear(in_features=4096, out_features=4096, bias=False)
(v_proj): Linear(in_features=4096, out_features=4096, bias=False)
(o_proj): Linear(in_features=4096, out_features=4096, bias=False)
(rotary_emb): LlamaRotaryEmbedding()
)
(mlp): LlamaMLP(
(gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
(up_proj): Linear(in_features=4096, out_features=11008, bias=False)
(down_proj): Linear(in_features=11008, out_features=4096, bias=False)
(act_fn): SiLU()
)
(input_layernorm): LlamaRMSNorm()
(post_attention_layernorm): LlamaRMSNorm()
)
)
(norm): LlamaRMSNorm()
(molecule_2d_lmm_projector): _MLPVectorProjector(
(mlp): Sequential(
(0): Linear(in_features=300, out_features=4096, bias=True)
(1): GELU(approximate='none')
(2): Linear(in_features=4096, out_features=4096, bias=True)
)
)
)
(lm_head): Linear(in_features=4096, out_features=32000, bias=False)
)