Error in accessing the hiddensize
model_id = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = AutoConfig.from_pretrained(model_id, hidden_size=4096, output_hidden_states=True)
model = AutoModel.from_pretrained(model_id, config=config,torch_dtype=torch.float16,
device_map = 'auto',
low_cpu_mem_usage=True,
trust_remote_code=True,ignore_mismatched_sizes=True)
model.config.use_cache = False # silence the warnings
model.config.pretraining_tp = 1
model.gradient_checkpointing_enable()
tokenizer.padding_side = 'right'
tokenizer.pad_token = tokenizer.eos_token
tokenizer.add_eos_token = True
tokenizer.add_bos_token, tokenizer.add_eos_token
def generate_embeddings(text, max_length=32000):
# Check if the text is empty and return zeros
if text.strip() == "":
# The size of the embeddings can be obtained from the model's configuration
embedding_dim = model.config.hidden_size
# Return a tensor of zeros with the same shape as the embeddings for one sequence
return torch.zeros(1, embedding_dim)
else:
with torch.no_grad():
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=max_length)
inputs = torch.tensor([tokenizer.convert_tokens_to_ids(inputs)])
inputs = inputs.to(device)
outputs = model(**inputs)
return outputs.last_hidden_state[:,0,:].detach()
subdata = []
for i in whole_corpus3:
subdata.append(generate_embeddings('\n'.join(i)))
embeddings = torch.stack(subdata)
Loading checkpoint shards: 100%
19/19 [00:51<00:00, 2.58s/it]
TypeError Traceback (most recent call last)
Cell In[45], line 50
48 subdata = []
49 for i in whole_corpus3:
---> 50 subdata.append(generate_embeddings('\n'.join(i)))
52 embeddings = torch.stack(subdata)
Cell In[45], line 41, in generate_embeddings(text, max_length)
39 inputs = torch.tensor([tokenizer.convert_tokens_to_ids(inputs)])
40 inputs = inputs.to(device)
---> 41 outputs = model(**inputs)
43 return outputs.last_hidden_state[:,0,:].detach()
TypeError: MixtralModel(
(embed_tokens): Embedding(32000, 4096)
(layers): ModuleList(
(0-31): 32 x MixtralDecoderLayer(
(self_attn): MixtralAttention(
(q_proj): Linear(in_features=4096, out_features=4096, bias=False)
(k_proj): Linear(in_features=4096, out_features=1024, bias=False)
(v_proj): Linear(in_features=4096, out_features=1024, bias=False)
(o_proj): Linear(in_features=4096, out_features=4096, bias=False)
(rotary_emb): MixtralRotaryEmbedding()
)
(block_sparse_moe): MixtralSparseMoeBlock(
(gate): Linear(in_features=4096, out_features=8, bias=False)
(experts): ModuleList(
(0-7): 8 x MixtralBLockSparseTop2MLP(
(w1): Linear(in_features=4096, out_features=14336, bias=False)
(w2): Linear(in_features=14336, out_features=4096, bias=False)
(w3): Linear(in_features=4096, out_features=14336, bias=False)
(act_fn): SiLU()
)
)
)
(input_layernorm): MixtralRMSNorm()
(post_attention_layernorm): MixtralRMSNorm()
)
)
(norm): MixtralRMSNorm()
) argument after ** must be a mapping, not Tensor