Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

article_writer / humanize.py

minko186

Update humanize.py

19de1c9 verified 7 months ago

raw

history blame

3.85 kB

	# import torch
	# from nltk import sent_tokenize
	# import nltk
	# from tqdm import tqdm
	# from transformers import T5ForConditionalGeneration, T5Tokenizer

	# nltk.download("punkt")
	# # autodetect the available device
	# GPU_IDX = 1 # which GPU to use
	# if torch.cuda.is_available():
	# num_gpus = torch.cuda.device_count()
	# print(f"Number of available GPUs: {num_gpus}")
	# assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
	# device = torch.device(f"cuda:{GPU_IDX}")
	# print(f"Using GPU: {GPU_IDX}")
	# else:
	# print("CUDA is not available. Using CPU instead.")
	# device = torch.device("cpu")

	# # Configuration for models and their adapters
	# model_config = {
	# "Base Model": "polygraf-ai/poly-humanizer-base",
	# "Large Model": "polygraf-ai/poly-humanizer-large",
	# # "XL Model": {
	# # "path": "google/flan-t5-xl",
	# # "adapters": {
	# # "XL Model Adapter": "polygraf-ai/poly-humanizer-XL-adapter",
	# # "XL Law Model Adapter": "polygraf-ai/poly-humanizer-XL-law-adapter",
	# # "XL Marketing Model Adapter": "polygraf-ai/marketing-cleaned-13K-grad-acum-4-full",
	# # "XL Child Style Model Adapter": "polygraf-ai/poly-humanizer-XL-children-adapter-checkpoint-4000",
	# # },
	# # },
	# }

	# # cache the base models, tokenizers, and adapters
	# models, tokenizers = {}, {}
	# for name, config in model_config.items():
	# path = config if isinstance(config, str) else config["path"]
	# # initialize model and tokenizer
	# model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
	# models[name] = model
	# tokenizers[name] = T5Tokenizer.from_pretrained(path)
	# # load all avalable adapters, each being additional roughly 150M parameters
	# if isinstance(config, dict) and "adapters" in config:
	# for adapter_name, adapter_path in config["adapters"].items():
	# model.load_adapter(adapter_path, adapter_name=adapter_name)
	# print(f"Loaded adapter: {adapter_name}, Num. params: {model.num_parameters()}")


	# def paraphrase_text(
	# text,
	# model_name="Base Model",
	# temperature=1.2,
	# repetition_penalty=1.0,
	# top_k=50,
	# length_penalty=1.0,
	# ):
	# # select the model, tokenizer and adapter
	# if "XL" in model_name: # dynamic adapter load/unload for XL models
	# # all adapter models use the XL model as the base
	# tokenizer, model = tokenizers["XL Model"], models["XL Model"]
	# # set the adapter if it's not already set
	# if model.active_adapters() != [f"{model_name} Adapter"]:
	# model.set_adapter(f"{model_name} Adapter")
	# print(f"Using adapter: {model_name} Adapter")
	# else:
	# tokenizer = tokenizers[model_name]
	# model = models[model_name]

	# # paraphrase each chunk of text
	# sentences = sent_tokenize(text) # sentence boundary detection
	# paraphrases = []
	# for sentence in tqdm(sentences):
	# sentence = sentence.strip()
	# if len(sentence) == 0:
	# continue
	# inputs = tokenizer("Please paraphrase this sentence: " + sentence, return_tensors="pt").to(device)
	# outputs = model.generate(
	# **inputs,
	# do_sample=True,
	# temperature=temperature,
	# repetition_penalty=repetition_penalty,
	# max_length=128,
	# top_k=top_k,
	# length_penalty=length_penalty,
	# )
	# paraphrased_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
	# paraphrases.append(paraphrased_sentence)
	# print(f"\nOriginal: {sentence}")
	# print(f"Paraphrased: {paraphrased_sentence}")

	# combined_paraphrase = " ".join(paraphrases)
	# return combined_paraphrase