from openai import OpenAI import gradio as gr import os import json import html import random import datetime api_key = os.environ.get('FEATHERLESS_API_KEY') client = OpenAI( base_url="https://api.featherless.ai/v1", api_key=api_key ) # from https://github.com/av/klmbr/blob/ca2967123d171fc6d91c329c40e5050a86088446/klmbr/main.py # I sure which I could import this, but can't figure out how to make HF spaces run this as a module # and not a file. import random mods = [ "capitalize", "diacritic", 'leetspeak', "remove_vowel", ] def klimbr_randomize(text, percentage): if not text: return "", {} # Return empty string and empty mapping if input is empty if not 0 <= percentage <= 100: raise ValueError("Percentage must be between 0 and 100") words = text.split() chars = list(text) num_chars_to_modify = max(1, int(len(chars) * (percentage / 100))) indices_to_modify = random.sample(range(len(chars)), num_chars_to_modify) word_mapping = {} for idx in indices_to_modify: modification = random.choice(mods) # Find the word that contains the current character current_length = 0 for word_idx, word in enumerate(words): if current_length <= idx < current_length + len(word): original_word = word word_start_idx = current_length break current_length += len(word) + 1 # +1 for the space else: # If we're here, we're likely dealing with a space or the last character continue if modification == "capitalize": chars[idx] = chars[idx].swapcase() elif modification == "diacritic": if chars[idx].isalpha(): diacritics = ["̀", "́", "̂", "̃", "̈", "̄", "̆", "̇", "̊", "̋"] chars[idx] = chars[idx] + random.choice(diacritics) elif modification == "leetspeak": leetspeak_map = { "a": "4", "e": "3", "i": "1", "o": "0", "s": "5", "t": "7", "b": "8", "g": "9", "l": "1", } chars[idx] = leetspeak_map.get(chars[idx].lower(), chars[idx]) elif modification == "remove_vowel": if chars[idx].lower() in "aeiou": chars[idx] = "" modified_word = "".join( chars[word_start_idx : word_start_idx + len(original_word)] ) if modified_word != original_word: # Clean up both the modified word and the original word cleaned_modified_word = modified_word.rstrip('.,') cleaned_original_word = original_word.rstrip('.,') word_mapping[cleaned_modified_word] = cleaned_original_word modified_text = "".join(chars) return modified_text, word_mapping ## end of klimbr inclusion klimbr_cache = {} def memoized_klimbr(message, percentage, extra): key = (message, percentage, extra) if key not in klimbr_cache: klimbr_cache[key] = klimbr_randomize(message, percentage)[0] return klimbr_cache[key] def klimberize_conversation(message, history, percentage): # we memoize the klimbrization of strings. # this is to work with the gradio chat interface model # so that messages are not _re_-randomized at each conversation turn klimbred_history = [ (memoized_klimbr(human, percentage, index), assistant) for index, (human, assistant) in enumerate(history) ] klimbred_message = memoized_klimbr(message, percentage, len(history)) return (klimbred_message, klimbred_history) def respond(message, history, model, klimbr_percentage): history_openai_format = [] message, history = klimberize_conversation(message, history, klimbr_percentage) for human, assistant in history: history_openai_format.append({"role": "user", "content": human }) history_openai_format.append({"role": "assistant", "content":assistant}) history_openai_format.append({"role": "user", "content": message}) response = client.chat.completions.create( model=model, messages= history_openai_format, temperature=1.0, stream=True, max_tokens=2000, extra_headers={ 'HTTP-Referer': 'https://huggingface.co/spaces/featherless-ai/klimbr-demo', 'X-Title': "Klimbr demo space" } ) partial_message = "" for chunk in response: if chunk.choices[0].delta.content is not None: content = chunk.choices[0].delta.content escaped_content = html.escape(content) partial_message += escaped_content yield partial_message logo = open('./logo.svg').read() # we chose a few models across the smaller model classes to give a sense of the technique MODEL_CHOICES = { "llama2-13b-4k": [ "NousResearch/Nous-Hermes-Llama2-13b", ], "llama3-8b-8k": [ "meta-llama/Meta-Llama-3-8B-Instruct", "NousResearch/Hermes-2-Theta-Llama-3-8B", "aaditya/Llama3-OpenBioLLM-8B", "elyza/Llama-3-ELYZA-JP-8B", "mlabonne/NeuralDaredevil-8B-abliterated", ], "llama31-8b-16k": [ "meta-llama/Meta-Llama-3.1-8B-Instruct", "NousResearch/Hermes-3-Llama-3.1-8B", "shenzhi-wang/Llama3.1-8B-Chinese-Chat", "AXCXEPT/Llama-3.1-8B-EZO-1.1-it", "mlabonne/Meta-Llama-3.1-8B-Instruct-abliterated", "VAGOsolutions/Llama-3.1-SauerkrautLM-8b-Instruct", ], "mistral-v02-7b-lc": [ "HuggingFaceH4/zephyr-7b-beta", "mlabonne/NeuralDaredevil-7B", "HuggingFaceH4/zephyr-7b-alpha", ], "mistral-nemo-12b-lc": [ "mistralai/Mistral-Nemo-Instruct-2407", ], "rwvk-14b-lc": [ "m8than/apple-rwkv-1-c-14b", ], } def build_model_choices(): all_choices = [] for model_class_name in MODEL_CHOICES: model_class = MODEL_CHOICES[model_class_name] all_choices += [ (f"{model_id} ({model_class_name})", model_id) for model_id in model_class ] return all_choices model_choices = build_model_choices() def initial_model(referer=None): return "mistralai/Mistral-Nemo-Instruct-2407" # let's use a random but different model each day. # key=os.environ.get('RANDOM_SEED', 'kcOtfNHA+e') # o = random.Random(f"{key}-{datetime.date.today().strftime('%Y-%m-%d')}") # return o.choice(model_choices)[1] title_text="Klimbr token input pre-processor demo space" klimbr_url="https://github.com/av/klmbr" css = """ .logo-mark { fill: #ffe184; } /* from https://github.com/gradio-app/gradio/issues/4001 * necessary as putting ChatInterface in gr.Blocks changes behaviour */ .contain { display: flex; flex-direction: column; } .gradio-container { height: 100vh !important; } #component-0 { height: 100%; } #chatbot { flex-grow: 1; overflow: auto;} .lead-text { display: flex; flex-direction: column; align-items: center; justify-content: center; padding: 20px; box-sizing: border-box; } .content { max-width: 60vh; text-align: center; font-size: 15pt; } .h1 { margin-bottom: 20px; } """ with gr.Blocks(title_text, css=css) as demo: gr.HTML(f"""

Klimbr demo space

Klimbr is a technique to increase entropy in LLM outputs by adding entropy to the input prompt prior to inference.

For details on the technique see the klimbr github or the source code of this space.

""") # hidden_state = gr.State(value=initial_model) percentage = gr.Slider( minimum=0, maximum=1, value=0.15, label="Percentage of input text to randomize" ) with gr.Row(): model_selector = gr.Dropdown( label="Select your Model", choices=model_choices, value=initial_model, # value=hidden_state, scale=4 ) gr.Button( value="Visit Model Card ↗️", scale=1 ).click( inputs=[model_selector], js="(model_selection) => { window.open(`https://huggingface.co/${model_selection}`, '_blank') }", fn=None, ) gr.ChatInterface( respond, additional_inputs=[model_selector, percentage], head=""", """, concurrency_limit=5 ) gr.HTML(f"""

Inference by {logo}

""") def update_initial_model_choice(request: gr.Request): return initial_model(request.headers.get('referer')) demo.load(update_initial_model_choice, outputs=model_selector) demo.launch()