Spaces:
Running
Running
export HF_HUB_OFFLINE=1 | |
export DATASET_ROOT="./evaluate/dataset/C_C++_Java_Python" | |
export RESULT_ROOT="./evaluate/result" | |
export EVAL_SCRIPT="./dataset_eval.py" | |
GPT-3.5-turbo | |
export KEY="" | |
export URL="https://api.chatanywhere.tech/v1/" | |
export MODEL="gpt-3.5-turbo" | |
mkdir -p $RESULT_ROOT/$MODEL | |
export INCLUDE_MSG=no | |
python $EVAL_SCRIPT -d $DATASET_ROOT/test.jsonl -t online -m $MODEL -u $URL -k $KEY -o "$RESULT_ROOT/$MODEL/nomsg.jsonl" 2>&1 | tee $RESULT_ROOT/$MODEL/nomsg.log | |
export INCLUDE_MSG=yes | |
python $EVAL_SCRIPT -d $DATASET_ROOT/test.jsonl -t online -m $MODEL -u $URL -k $KEY -o "$RESULT_ROOT/$MODEL/msg.jsonl" 2>&1 | tee $RESULT_ROOT/$MODEL/msg.log | |
# DeepSeek-V3 | |
export KEY="" | |
export URL="https://api.deepseek.com/v1" | |
export MODEL="deepseek-chat" | |
mkdir -p $RESULT_ROOT/$MODEL | |
export INCLUDE_MSG=no | |
python $EVAL_SCRIPT -d $DATASET_ROOT/test.jsonl -t online -m $MODEL -u $URL -k $KEY -o "$RESULT_ROOT/$MODEL/nomsg.jsonl" 2>&1 | tee $RESULT_ROOT/$MODEL/nomsg.log | |
export INCLUDE_MSG=yes | |
python $EVAL_SCRIPT -d $DATASET_ROOT/test.jsonl -t online -m $MODEL -u $URL -k $KEY -o "$RESULT_ROOT/$MODEL/msg.jsonl" 2>&1 | tee $RESULT_ROOT/$MODEL/msg.log | |
qwen-coder-plus | |
export KEY="" | |
export URL="https://dashscope.aliyuncs.com/compatible-mode/v1" | |
export MODEL="qwen-coder-plus" | |
mkdir -p $RESULT_ROOT/$MODEL | |
export INCLUDE_MSG=no | |
python $EVAL_SCRIPT -d $DATASET_ROOT/test.jsonl -t online -m $MODEL -u $URL -k $KEY -o "$RESULT_ROOT/$MODEL/nomsg.jsonl" 2>&1 | tee $RESULT_ROOT/$MODEL/nomsg.log | |
export INCLUDE_MSG=yes | |
python $EVAL_SCRIPT -d $DATASET_ROOT/test.jsonl -t online -m $MODEL -u $URL -k $KEY -o "$RESULT_ROOT/$MODEL/msg.jsonl" 2>&1 | tee $RESULT_ROOT/$MODEL/msg.log | |