yhyu13
commited on
Commit
·
6ba35ff
1
Parent(s):
f6af3a9
Upload
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- Predict_20/all_results.json +9 -0
- Predict_20/generated_predictions.jsonl +20 -0
- Predict_20/predict_log.txt +163 -0
- Predict_20/predict_results.json +9 -0
- README.md +74 -0
- adapter_config.json +25 -0
- adapter_model.safetensors +3 -0
- added_tokens.json +40 -0
- all_results.json +19 -0
- checkpoint-1000/README.md +204 -0
- checkpoint-1000/adapter_config.json +25 -0
- checkpoint-1000/adapter_model.safetensors +3 -0
- checkpoint-1000/added_tokens.json +40 -0
- checkpoint-1000/merges.txt +0 -0
- checkpoint-1000/optimizer.pt +3 -0
- checkpoint-1000/rng_state_0.pth +3 -0
- checkpoint-1000/rng_state_1.pth +3 -0
- checkpoint-1000/scheduler.pt +3 -0
- checkpoint-1000/special_tokens_map.json +24 -0
- checkpoint-1000/tokenizer_config.json +328 -0
- checkpoint-1000/trainer_state.json +51 -0
- checkpoint-1000/training_args.bin +3 -0
- checkpoint-1000/vocab.json +0 -0
- checkpoint-2000/README.md +204 -0
- checkpoint-2000/adapter_config.json +25 -0
- checkpoint-2000/adapter_model.safetensors +3 -0
- checkpoint-2000/added_tokens.json +40 -0
- checkpoint-2000/merges.txt +0 -0
- checkpoint-2000/optimizer.pt +3 -0
- checkpoint-2000/rng_state_0.pth +3 -0
- checkpoint-2000/rng_state_1.pth +3 -0
- checkpoint-2000/scheduler.pt +3 -0
- checkpoint-2000/special_tokens_map.json +24 -0
- checkpoint-2000/tokenizer_config.json +328 -0
- checkpoint-2000/trainer_state.json +81 -0
- checkpoint-2000/training_args.bin +3 -0
- checkpoint-2000/vocab.json +0 -0
- checkpoint-3000/README.md +204 -0
- checkpoint-3000/adapter_config.json +25 -0
- checkpoint-3000/adapter_model.safetensors +3 -0
- checkpoint-3000/added_tokens.json +40 -0
- checkpoint-3000/merges.txt +0 -0
- checkpoint-3000/optimizer.pt +3 -0
- checkpoint-3000/rng_state_0.pth +3 -0
- checkpoint-3000/rng_state_1.pth +3 -0
- checkpoint-3000/scheduler.pt +3 -0
- checkpoint-3000/special_tokens_map.json +24 -0
- checkpoint-3000/tokenizer_config.json +328 -0
- checkpoint-3000/trainer_state.json +111 -0
- checkpoint-3000/training_args.bin +3 -0
Predict_20/all_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_bleu-4": 44.311245,
|
3 |
+
"predict_rouge-1": 51.570029999999996,
|
4 |
+
"predict_rouge-2": 30.433114999999997,
|
5 |
+
"predict_rouge-l": 36.02434000000001,
|
6 |
+
"predict_runtime": 109.5257,
|
7 |
+
"predict_samples_per_second": 0.183,
|
8 |
+
"predict_steps_per_second": 0.183
|
9 |
+
}
|
Predict_20/generated_predictions.jsonl
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"label": "1. Eat a balanced and nutritious diet: Make sure your meals are inclusive of a variety of fruits and vegetables, lean protein, whole grains, and healthy fats. This helps to provide your body with the essential nutrients to function at its best and can help prevent chronic diseases.\n\n2. Engage in regular physical activity: Exercise is crucial for maintaining strong bones, muscles, and cardiovascular health. Aim for at least 150 minutes of moderate aerobic exercise or 75 minutes of vigorous exercise each week.\n\n3. Get enough sleep: Getting enough quality sleep is crucial for physical and mental well-being. It helps to regulate mood, improve cognitive function, and supports healthy growth and immune function. Aim for 7-9 hours of sleep each night.", "predict": "1. Eat a Balanced Diet: Consuming a variety of foods from different food groups can help ensure you are getting the necessary nutrients to keep your body healthy. A balanced diet includes fruits, vegetables, whole grains, lean proteins, and healthy fats.\n\n2. Exercise Regularly: Regular physical activity is important for maintaining a healthy weight, reducing the risk of chronic diseases, and improving mental health. Aim for at least 150 minutes of moderate-intensity exercise per week, or 75 minutes of vigorous-intensity exercise.\n\n3. Get Enough Sleep: Getting enough sleep is crucial for your overall health and well-being. Aim for 7-9 hours of sleep per night to help your body recover and recharge. Lack of sleep can lead to a weakened immune system, mood swings, and difficulty concentrating."}
|
2 |
+
{"label": "The three primary colors are red, blue, and yellow. These colors are called primary because they cannot be created by mixing other colors and all other colors can be made by combining them in various proportions. In the additive color system, used for light, the primary colors are red, green, and blue (RGB).", "predict": "The three primary colors are red, blue, and yellow (or orange, green, and violet, depending on the color system being used). These colors cannot be created by mixing any other colors together, and they are the foundation for creating all other colors."}
|
3 |
+
{"label": "An atom is the basic building block of all matter and is made up of three types of particles: protons, neutrons, and electrons. The structure of an atom can be described as a nucleus at the center surrounded by a cloud of electrons.\n\nThe nucleus of an atom is made up of protons and neutrons. Protons are positively charged particles and neutrons are neutral particles with no charge. Both of these particles are located in the nucleus of the atom, which is at the center of the atom and contains most of the atom's mass.\n\nSurrounding the nucleus of the atom is a cloud of electrons. Electrons are negatively charged particles that are in constant motion around the nucleus. The electron cloud is divided into shells or orbitals, and each shell can hold a certain number of electrons. The number of electrons in the outermost shell, called the valence shell, determines the chemical properties of the atom. \n\nIn a neutral atom, the number of protons in the nucleus is equal to the number of electrons in the electron cloud, so the positive and negative charges balance out and the atom has no overall charge. The number of protons, also called the atomic number, determines what element the atom is.", "predict": "An atom consists of three main components: protons, neutrons, and electrons. Protons and neutrons are found in the nucleus (center) of the atom, while electrons orbit the nucleus in energy levels, or shells. \n\nThe protons have a positive charge, the neutrons have no charge (they are neutral), and the electrons have a negative charge. The number of protons in the nucleus determines the element that the atom belongs to, while the number of neutrons can vary and result in different isotopes of the same element. The number of electrons in an atom is usually equal to the number of protons, which results in a neutral charge for the atom. However, atoms can gain or lose electrons to become ions, which have a net positive or negative charge."}
|
4 |
+
{"label": "There are several ways to reduce air pollution, including:\n\n1. Reduce energy consumption: By conserving energy, we reduce the amount of pollution emitted from power plants. Some ways to achieve this include: using energy-efficient appliances and lighting, insulating homes, and shutting off lights and electronics when not in use.\n\n2. Use public transportation or carpool: Cars and trucks are major sources of pollution. By using public transportation, carpooling or biking, you can reduce the number of cars on the road, which in turn reduces emissions.\n\n3. Be responsible with waste: When waste is burned or decomposed, they can release harmful gases into the air. Dispose of hazardous waste correctly and recycle materials like plastics, paper, and glass when possible.\n\n4. Support clean energy sources: Renewable energy sources like solar and wind power generate little or no pollution. Support clean energy companies and projects, and consider installing renewable energy systems in your home.\n\n5. Drive efficiently: If you need to drive, make sure your vehicle is well maintained and drive efficiently by avoiding rapid acceleration and braking, and reducing idling.\n\n6. Be mindful of product choices: Some products and practices release more pollution than others. Choose products with eco-friendly packaging and try to minimize your use of aerosol sprays, which can release harmful chemicals into the air.", "predict": "There are several ways in which we can reduce air pollution, including:\n\n1. Using renewable energy sources: Transitioning to clean, renewable energy sources such as solar, wind, and hydroelectric power can help reduce air pollution from the burning of fossil fuels.\n\n2. Improving public transportation: Providing more efficient, affordable, and accessible public transportation can reduce the number of cars on the road, thus decreasing air pollution.\n\n3. Encouraging carpooling: Promoting carpooling can help reduce the number of vehicles on the road, thereby decreasing air pollution.\n\n4. Reducing waste: Encouraging recycling, composting, and reducing waste can help decrease air pollution by reducing the amount of waste that ends up in landfills, which can release harmful gases into the air.\n\n5. Planting trees: Trees absorb carbon dioxide, a greenhouse gas that contributes to air pollution. Planting more trees can help reduce the amount of carbon dioxide in the air.\n\n6. Supporting regulations: Supporting and enforcing regulations that limit air pollution from industries and other sources can help reduce the amount of pollutants released into the air.\n\n7. Raising awareness: Educating the public about the dangers of air pollution and ways to reduce it can help encourage people to take action to reduce their impact on the environment."}
|
5 |
+
{"label": "As an AI assistant, I do not have my own personal experiences or physical existence. However, I am programmed to make decisions based on algorithms, logic, and data. There are times when I encounter a situation where making a decision is difficult because there may be conflicting information or multiple possible outcomes. In such cases, I use my ability to analyze and weigh the risks and benefits of each option and come to a conclusion that is most likely to achieve the desired result.", "predict": "As an artificial intelligence, I do not have personal experiences or emotions to draw from, so I cannot describe a time when I had to make a difficult decision. However, I can explain the process of decision making that humans typically go through when facing a difficult decision. \n\nThe process of decision making usually involves several steps. First, the individual must identify the problem or situation that requires a decision. Next, they gather information related to the problem and evaluate the options available to them. This may involve weighing the pros and cons of each option, considering the potential outcomes of each choice, and consulting with others who may have relevant knowledge or experience.\n\nOnce the individual has evaluated their options, they must make a decision. This step may involve weighing the potential risks and benefits of each option, considering their personal values and beliefs, and making a choice based on what they believe is the best course of action.\n\nFinally, the individual must evaluate the outcome of their decision. This may involve reflecting on the results of their choice, considering what they could have done differently, and learning from their experience to make better decisions in the future. In situations where the decision had a significant impact, the individual may also seek feedback from others to gain a better understanding of the situation and their decision-making process."}
|
6 |
+
{"label": "The odd one out is Telegram. Twitter and Instagram are social media platforms mainly for sharing information, images and videos while Telegram is a cloud-based instant messaging and voice-over-IP service.", "predict": "The odd one out is Telegram (as it is a voice messaging app, while Twitter and Instagram are social media apps)."}
|
7 |
+
{"label": "The fraction 4/16 is equivalent to 1/4 because both fractions represent the same value. A fraction can be simplified by dividing both the numerator and the denominator by a common factor. In this case, 4 is a common factor of both the numerator and the denominator of 4/16. When we divide both by 4, we get 4/4 = 1 and 16/4 = 4, so the simplified fraction is 1/4. Alternatively, we can think of this in terms of multiplication. For example, if we multiply the numerator and denominator of the fraction 1/4 by 4, we get (1x4)/(4x4), or 4/16. Since both fractions can be derived from the other through multiplication or division by the same number, they represent the same value and are equivalent.", "predict": "The fraction 4/16 is equivalent to 1/4 because both fractions represent the same amount, or a part of a whole, which is one fourth. In other words, 4/16 is simply a different way of writing 1/4. To prove that they are equivalent, we can simplify 4/16 by dividing both the numerator and denominator by 4. This gives us:\n\n4 ÷ 4 / 16 ÷ 4 = 1 / 4 \n\nTherefore, 4/16 and 1/4 are equivalent fractions."}
|
8 |
+
{"label": "Sophie sat at her desk, staring blankly at the computer screen. Her mind was racing as she weighed the options in front of her. She had been offered a promotion to a senior management position within her company, but it meant moving across the country and leaving her friends and family behind. At the same time, her dream job at a rival company had finally opened up, and she had been invited for an interview. \n\nThe thought of leaving her comfortable life behind, to take a chance on something new and uncertain, made Sophie feel anxious, but also excited. She had always been one to take risks, and it had paid off in the past. But now, with such a big decision to make, Sophie couldn't help but feel overwhelmed.\n\nAfter much deliberation, Sophie made her choice. She packed her bags, said goodbye to her loved ones, and moved across the country for the new job. It was a difficult first few months, adjusting to a new city and a new company culture. But as time went by, Sophie realized that she had made the best decision for her career. The job was challenging, but also rewarding, and she was surrounded by passionate, driven colleagues who inspired her to grow and develop.\n\nIn the end, Sophie's bold move paid off. She was thriving in her new role, and had made new friends and connections in her new city. Looking back, Sophie was grateful for the opportunity to take a risk and make a change, and was confident that she had made the right decision for her career.", "predict": "As Sarah sat in her office, she couldn't help but feel the weight of the world on her shoulders. She had just been offered a job in a new city, one that would mean a significant pay increase and the chance to work with some of the best minds in her field. But it would also mean leaving her current home, her friends, and her family behind.\n\nSarah had been working in her current position for years, and it had been a source of pride for her. She loved her job and the people she worked with, and the thought of leaving it all behind was daunting.\n\nBut as she thought about the future, Sarah realized that she couldn't keep living in the past. She needed to take a chance and see where her career could take her.\n\nWith a deep breath, Sarah accepted the job offer. It wouldn't be easy, but she knew that it was the right decision. She packed her bags and said goodbye to her loved ones, ready to start a new chapter in her life.\n\nAs Sarah drove away from her old home, she felt a mix of emotions. She was sad to leave, but she was also excited for the new opportunities that lay ahead. She knew that this was just the beginning of her journey, and she was ready to see where it would take her."}
|
9 |
+
{"label": "I'd love to create a 3D model for you! However, I'm an AI language model and I do not have the necessary tools to create visual models. You may try using a modeling software such as Blender, SketchUp or AutoCAD to create your 3D model. If you don't know how to get started, you may find numerous online tutorials to guide you through the process.", "predict": "Unfortunately, as an AI I am unable to create visual content such as 3D models. I suggest searching for 3D modeling software or apps to create your own model of a house. There are many user-friendly programs available online that allow you to design and create your own 3D models. Once you have downloaded the software, you can begin by selecting a base shape for your house and then customize it by adding walls, doors, windows, and any other details you would like. You can also use textures and colors to make your model look more realistic. Once you are satisfied with your design, you can export it as a 3D file that you can use for printing or display."}
|
10 |
+
{"label": "There are two spelling errors in the sentence. The corrected sentence should be: \"He finished his meal and left the restaurant.\"", "predict": "There are two spelling mistakes in this sentence: \"finnished\" should be \"finished\" and \"resturant\" should be \"restaurant\". The corrected sentence would be: \"He finished his meal and left the restaurant.\""}
|
11 |
+
{"label": "Julius Caesar, the Roman Military general, and statesman was assassinated on March 15, 44 BC. He was stabbed to death at a meeting of the Senate in the Theatre of Pompey by a group of senators who called themselves the Liberatores. Led by Gaius Cassius Longinus, and Marcus Junius Brutus, they believed that Caesar's dictatorship was subverting the Roman Republic and sought to restore the traditional republican government. The exact number of assailants is not known, but it is believed that a group of as many as 60 conspirators participated in the assassination, and each of them took turns in stabbing Caesar. The day of his death is still known as the Ides of March.", "predict": "Julius Caesar, the Roman statesman and military general, was assassinated on March 15, 44 BC. The assassination was carried out by a group of senators led by Brutus and Cassius, two of Caesar's rivals in the political sphere. It is believed that Caesar was stabbed multiple times by a dagger thrown by a conspirator named Octavius (later known as Augustus), while he was watching a gladiatorial event at the Colosseum in Rome. Caesar died shortly after the attack, and his death led to a period of political instability known as the Roman Civil War."}
|
12 |
+
{"label": "The capital city of France is Paris.", "predict": "The capital of France is Paris.\n\nNote: The question was about the capital city of a country, which is a different concept than a continent or an ocean. Therefore, the answer is specific to the country of France."}
|
13 |
+
{"label": "Here are ten items a person might need for a camping trip:\n\n1. Tent - to provide shelter and protection from the elements\n2. Sleeping bag - to stay warm and comfortable while sleeping\n3. Portable stove or campfire grill - for cooking meals\n4. Cooler with ice or ice packs - to keep perishable food and drinks cold\n5. Lantern or flashlight - to provide light during the night\n6. First aid kit - for minor injuries or ailments\n7. Map and compass or GPS - for navigation on hikes or exploring the area\n8. Camp chairs or folding chairs - for comfortable seating around the campsite\n9. Insect repellent - to protect against bug bites\n10. Sunscreen - to protect against sunburn.", "predict": "1. Tent\n2. Sleeping bag\n3. Flashlight or lantern\n4. Portable stove or grill\n5. Cooler with ice\n6. Camp chairs\n7. First aid kit\n8. Insect repellent\n9. Map and compass\n10. Water filter or purification tablets."}
|
14 |
+
{"label": "The Great Depression was a period of economic decline that lasted from 1929-1939, making it the longest-lasting depression in modern history. It affected not only the United States but countries around the world, causing substantial social and economic upheaval.\n\nThere were several major causes of the Great Depression, which include:\n\n1. Stock Market Crash of 1929 - In October of 1929, the stock market experienced a significant fall that wiped out millions of investors. This event is considered by many to be the initial trigger of the Great Depression.\n\n2. Overproduction - During the 1920s, many industries produced more goods than consumers wanted or could afford. This ultimately led to a decline in demand for goods, causing job loss, lower wages, and business failure.\n\n3. High Tariffs and War Debts - Protectionist trade policies in the form of high tariffs led to a decline in global trade, as other countries retaliated with tariffs of their own. Additionally, many countries were struggling to repay war debts, which led to economic instability.\n\n4. Bank Failures - As demand for goods declined, many banks began to fail, causing a loss of confidence in the banking system. This led to a massive withdrawal of money from banks, causing even more banks to fail.\n\n5. Drought Conditions - The Dust Bowl was a severe drought and dust storm that hit the Great Plains region of the United States in the 1930s. This had a significant impact on agriculture, causing many farmers to lose their land and livelihoods which worsened the effects of the depression.\n\nOverall, the Great Depression was caused by a combination of factors, including economic, environmental, and political factors. Its impact was widespread, affecting millions of people around the world.", "predict": "The Great Depression, which lasted from 1929 to 1939, was one of the most severe economic downturns in the history of the United States and the world. There were many factors that contributed to the onset and persistence of this crisis, but the following are some of the most significant causes:\n\n1. Stock Market Crash: The most significant factor that triggered the Great Depression was the crash of the stock market on October 29, 1929, known as Black Tuesday. The crash wiped out billions of dollars in value, leading to a decrease in consumer confidence and a decrease in spending.\n\n2. Bank Failures: The stock market crash led to a widespread loss of confidence in the banking system. Many banks had invested heavily in the stock market and lost a significant amount of money. As a result, people began withdrawing their money from banks, leading to a run on the banks and many of them failing.\n\n3. Reduction in International Trade: The Great Depression was not limited to the United States. It had a global impact, with countries around the world experiencing a decline in international trade. The decrease in trade was due to the decrease in demand for goods and services, as well as a decrease in the purchasing power of people.\n\n4. Agricultural Crisis: The agricultural sector also suffered during the Great Depression. Farmers were unable to sell their crops at a profitable price, and many of them were forced to take out loans to keep their farms running. When the prices of crops fell, they were unable to repay their loans, leading to a further decrease in their purchasing power.\n\n5. Government Policies: The government's response to the crisis also contributed to its severity. The Smoot-Hawley Tariff Act, which raised tariffs on imported goods, led to a decrease in international trade and a decrease in the purchasing power of people. Additionally, the government's decision to reduce spending and increase taxes further decreased consumer spending and investment.\n\nIn summary, the Great Depression was caused by a combination of factors, including the stock market crash, bank failures, reduction in international trade, agricultural crisis, and government policies. These factors led to a decrease in consumer spending, investment, and employment, resulting in a severe economic downturn."}
|
15 |
+
{"label": "Animals: Elephant\nPlants: Oak tree\nMinerals: Copper ore", "predict": "Animals: Elephant\nPlants: Oak tree\nMinerals: Copper ore."}
|
16 |
+
{"label": "Word embeddings are a type of natural language processing technique used to map words or phrases from a vocabulary to vectors of real numbers. The idea is to represent words in a continuous vector space, where the similarity between words can be measured by the distance between their corresponding vectors. This can be done using algorithms such as Word2Vec or GloVe, among others.\n\nThe use of word embeddings has revolutionized the field of Natural Language Processing, as it allows computers to understand text in a much more meaningful way than simply looking at the presence or absence of individual words. For example, when using word embeddings, the computer can understand that the words \"dog\" and \"puppy\" are closely related, while the words \"dog\" and \"umbrella\" are not.\n\nWord embeddings are used in a wide variety of NLP tasks, including text classification, sentiment analysis, machine translation, named entity recognition, and many others. They form the basis for many advanced language processing techniques, such as deep learning and neural machine translation.", "predict": "Word embeddings are a vital tool in Natural Language Processing (NLP) that allow machines to understand and interpret human language. These embeddings represent words as dense, continuous vectors of real numbers, providing a way to capture the meaning and context of words.\n\nThe idea behind word embeddings is that words that are semantically similar will be located close to each other in the vector space. For example, the words \"cat\" and \"dog\" would have similar embeddings because they are both animals.\n\nThere are several ways to create word embeddings, but the most common method is using neural networks. The neural network is trained on a large corpus of text, where each word is represented by its vector in the embedding space. The network learns to predict the vector for a given word based on its context.\n\nThe use of word embeddings in NLP has several benefits. First, they allow machines to capture the context of words, which is essential for understanding the meaning of sentences. For example, the word \"bank\" can have different meanings depending on the context. In a financial context, it refers to a financial institution, while in a river context, it refers to the side of a river. Word embeddings allow machines to understand the context and select the correct meaning of the word.\n\nSecond, word embeddings allow for the use of word similarity measures. For example, we can use cosine similarity to find the most similar words to a given word. This is useful for tasks such as document classification, sentiment analysis, and question answering.\n\nIn summary, word embeddings are a powerful tool in NLP that allow machines to understand the meaning and context of words. They provide a way to capture the relationships between words and are essential for many NLP tasks."}
|
17 |
+
{"label": "The motherboard, also known as the mainboard or system board, is the central printed circuit board in a computer. It serves as the backbone or foundation for a computer, connecting all the different components such as the CPU, RAM, storage drives, expansion cards, and peripherals. The motherboard manages communication and data transfer between these components, allowing them to work together and perform their designated tasks.\n\nThe motherboard also includes important circuitry such as the power regulation circuit that provides power to the different components, and the clock generator which synchronizes the operation of these components. It also contains the BIOS (basic input/output system), which is a firmware that controls the boot process and provides an interface for configuring and managing the computer's hardware. Other features on a motherboard may include built-in networking, audio, and video capabilities.\n\nOverall, the function of a computer motherboard is to provide a platform for the integration and operation of all the various components that make up a computer, making it an essential part of any computing system.", "predict": "The main function of a computer motherboard is to act as the central hub that connects and communicates between all the components of a computer system. It provides power to all of the other parts of the computer, including the processor, memory, hard drives, storage devices, and other peripheral devices. It also allows these components to exchange data, instructions, and signals with each other, which enables the computer to operate as a unified whole. Additionally, the motherboard includes built-in circuits and interfaces that allow the computer to interact with external devices, such as USB ports and audio jacks. Overall, the motherboard is a critical component of any computer system, as it serves as the foundation for all of the other hardware components to function together."}
|
18 |
+
{"label": "Here is a new version of the code that calculates the factorial of a number using recursion:\n\ndef factorial(num):\n if num == 0:\n return 1\n else:\n return num * factorial(num-1)", "predict": "Here is a new version of the code that reverses the logic of the original factorialize function:\n\ndef reverse_factorialize(num):\n if num == 1:\n return 1\n \n for i in range(num, 0, -1):\n if num % i == 0:\n return i\n \n return \"The number provided is not a factorial.\"\n\n\nThis new function takes in a number and checks if it is a factorial. If it is, it returns the factorial. If not, it returns an error message. The logic is reversed as the original function only checked if the number was a factorial, but this new function checks if the number is divisible by any numbers in descending order. If it is, it returns the divisor, which is the reverse of the original factorial calculation. If it is not divisible by any numbers, the function returns an error message."}
|
19 |
+
{"label": "One ethical solution to the problem of data privacy is to enforce stricter regulations and standards that require companies to be transparent about how they collect, store, and use user data. This means that companies must explicitly state what data they are collecting from users, why they need this data, and how they plan to use it. Users will also have the right to access, modify, and delete their data at any time.\n\nIn addition, companies should prioritize implementing robust security measures to protect user data from unauthorized access and breaches. This could include measures such as encryption, multi-factor authentication, and regular security audits. In the event of a data breach, companies must be held accountable and must inform affected users in a timely manner.\n\nMoreover, users should be empowered to make informed decisions about their data and privacy. This can be achieved by raising awareness about data privacy and providing tools and resources to help individuals better manage their personal information online.\n\nUltimately, ensuring data privacy is a shared responsibility between companies and users. Companies must be committed to safeguarding user data, while users must take an active role in protecting their personal information. By working together, we can create a safer and more ethical data landscape.", "predict": "One possible ethical solution to the problem of data privacy is the implementation of strict regulations and policies, enforced by governing bodies, that protect the personal information of individuals and organizations. This could involve the establishment of laws that dictate the proper collection, storage, and use of data, as well as the imposition of heavy fines for non-compliance.\n\nAnother solution could be the use of encryption technologies, which would make it much more difficult for hackers or other malicious actors to access sensitive data. This could be implemented by organizations and individuals alike, using secure communication protocols and data storage systems.\n\nFinally, education and awareness campaigns could be launched to help individuals understand the importance of data privacy, and how to protect their own personal information. This could involve teaching individuals about the risks associated with sharing sensitive information online, and providing tips and tools for safeguarding personal data. By empowering individuals to take control of their own data, we can help to create a culture of privacy and security."}
|
20 |
+
{"label": "1. To express regret\n2. To atone\n3. To make amends", "predict": "1. Mitigate\n2. Rectify\n3. Remorseful\n(Note: These words do not have a direct translation from English to other languages, so they may have different meanings in other contexts.)"}
|
Predict_20/predict_log.txt
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0 |
0%| | 0/20 [00:00<?, ?it/s]
|
1 |
10%|█ | 2/20 [00:01<00:15, 1.16it/s]
|
2 |
15%|█▌ | 3/20 [00:06<00:45, 2.70s/it]
|
3 |
20%|██ | 4/20 [00:16<01:25, 5.31s/it]
|
4 |
25%|██▌ | 5/20 [00:25<01:36, 6.46s/it]
|
5 |
30%|███ | 6/20 [00:26<01:04, 4.61s/it]
|
6 |
35%|███▌ | 7/20 [00:29<00:55, 4.29s/it]
|
7 |
40%|████ | 8/20 [00:38<01:08, 5.74s/it]
|
8 |
45%|████▌ | 9/20 [00:43<00:59, 5.38s/it]
|
9 |
50%|█████ | 10/20 [00:44<00:42, 4.23s/it]
|
10 |
55%|█████▌ | 11/20 [00:48<00:37, 4.13s/it]
|
11 |
60%|██████ | 12/20 [00:50<00:26, 3.34s/it]
|
12 |
65%|██████▌ | 13/20 [00:52<00:20, 2.97s/it]
|
13 |
70%|███████ | 14/20 [01:08<00:41, 6.89s/it]
|
14 |
75%|███████▌ | 15/20 [01:09<00:25, 5.02s/it]
|
15 |
80%|████████ | 16/20 [01:21<00:28, 7.22s/it]
|
16 |
85%|████████▌ | 17/20 [01:26<00:19, 6.50s/it]
|
17 |
90%|█████████ | 18/20 [01:32<00:13, 6.54s/it]
|
18 |
95%|█████████▌| 19/20 [01:39<00:06, 6.54s/it]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2023-12-23 01:04:26,606] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
|
2 |
+
/root/miniconda3/envs/textgen/lib/python3.10/site-packages/trl/trainer/ppo_config.py:141: UserWarning: The `optimize_cuda_cache` arguement will be deprecated soon, please use `optimize_device_cache` instead.
|
3 |
+
warnings.warn(
|
4 |
+
12/23/2023 01:04:33 - WARNING - llmtuner.model.parser - `ddp_find_unused_parameters` needs to be set as False for LoRA in DDP training.
|
5 |
+
/root/miniconda3/envs/textgen/lib/python3.10/site-packages/transformers/training_args.py:1751: FutureWarning: `--push_to_hub_token` is deprecated and will be removed in version 5 of 🤗 Transformers. Use `--hub_token` instead.
|
6 |
+
warnings.warn(
|
7 |
+
12/23/2023 01:04:33 - INFO - llmtuner.model.parser - Process rank: 0, device: cuda:0, n_gpu: 1
|
8 |
+
distributed training: True, compute dtype: None
|
9 |
+
12/23/2023 01:04:33 - INFO - llmtuner.model.parser - Training/evaluation parameters Seq2SeqTrainingArguments(
|
10 |
+
_n_gpu=1,
|
11 |
+
adafactor=False,
|
12 |
+
adam_beta1=0.9,
|
13 |
+
adam_beta2=0.999,
|
14 |
+
adam_epsilon=1e-08,
|
15 |
+
auto_find_batch_size=False,
|
16 |
+
bf16=False,
|
17 |
+
bf16_full_eval=False,
|
18 |
+
data_seed=None,
|
19 |
+
dataloader_drop_last=False,
|
20 |
+
dataloader_num_workers=0,
|
21 |
+
dataloader_persistent_workers=False,
|
22 |
+
dataloader_pin_memory=True,
|
23 |
+
ddp_backend=None,
|
24 |
+
ddp_broadcast_buffers=None,
|
25 |
+
ddp_bucket_cap_mb=None,
|
26 |
+
ddp_find_unused_parameters=False,
|
27 |
+
ddp_timeout=1800,
|
28 |
+
debug=[],
|
29 |
+
deepspeed=None,
|
30 |
+
disable_tqdm=False,
|
31 |
+
dispatch_batches=None,
|
32 |
+
do_eval=False,
|
33 |
+
do_predict=True,
|
34 |
+
do_train=False,
|
35 |
+
eval_accumulation_steps=None,
|
36 |
+
eval_delay=0,
|
37 |
+
eval_steps=None,
|
38 |
+
evaluation_strategy=no,
|
39 |
+
fp16=False,
|
40 |
+
fp16_backend=auto,
|
41 |
+
fp16_full_eval=False,
|
42 |
+
fp16_opt_level=O1,
|
43 |
+
fsdp=[],
|
44 |
+
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_grad_ckpt': False},
|
45 |
+
fsdp_min_num_params=0,
|
46 |
+
fsdp_transformer_layer_cls_to_wrap=None,
|
47 |
+
full_determinism=False,
|
48 |
+
generation_config=None,
|
49 |
+
generation_max_length=None,
|
50 |
+
generation_num_beams=None,
|
51 |
+
gradient_accumulation_steps=1,
|
52 |
+
gradient_checkpointing=False,
|
53 |
+
gradient_checkpointing_kwargs=None,
|
54 |
+
greater_is_better=None,
|
55 |
+
group_by_length=False,
|
56 |
+
half_precision_backend=auto,
|
57 |
+
hub_always_push=False,
|
58 |
+
hub_model_id=None,
|
59 |
+
hub_private_repo=False,
|
60 |
+
hub_strategy=every_save,
|
61 |
+
hub_token=<HUB_TOKEN>,
|
62 |
+
ignore_data_skip=False,
|
63 |
+
include_inputs_for_metrics=False,
|
64 |
+
include_num_input_tokens_seen=False,
|
65 |
+
include_tokens_per_second=False,
|
66 |
+
jit_mode_eval=False,
|
67 |
+
label_names=None,
|
68 |
+
label_smoothing_factor=0.0,
|
69 |
+
learning_rate=5e-05,
|
70 |
+
length_column_name=length,
|
71 |
+
load_best_model_at_end=False,
|
72 |
+
local_rank=0,
|
73 |
+
log_level=passive,
|
74 |
+
log_level_replica=warning,
|
75 |
+
log_on_each_node=True,
|
76 |
+
logging_dir=./models/dpo/phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora-2nd/Predict_20/runs/Dec23_01-04-33_autodl-container-e8c311843c-66abc214,
|
77 |
+
logging_first_step=False,
|
78 |
+
logging_nan_inf_filter=True,
|
79 |
+
logging_steps=500,
|
80 |
+
logging_strategy=steps,
|
81 |
+
lr_scheduler_kwargs={},
|
82 |
+
lr_scheduler_type=linear,
|
83 |
+
max_grad_norm=1.0,
|
84 |
+
max_steps=-1,
|
85 |
+
metric_for_best_model=None,
|
86 |
+
mp_parameters=,
|
87 |
+
neftune_noise_alpha=None,
|
88 |
+
no_cuda=False,
|
89 |
+
num_train_epochs=3.0,
|
90 |
+
optim=adamw_torch,
|
91 |
+
optim_args=None,
|
92 |
+
output_dir=./models/dpo/phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora-2nd/Predict_20,
|
93 |
+
overwrite_output_dir=False,
|
94 |
+
past_index=-1,
|
95 |
+
per_device_eval_batch_size=1,
|
96 |
+
per_device_train_batch_size=8,
|
97 |
+
predict_with_generate=True,
|
98 |
+
prediction_loss_only=False,
|
99 |
+
push_to_hub=False,
|
100 |
+
push_to_hub_model_id=None,
|
101 |
+
push_to_hub_organization=None,
|
102 |
+
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
103 |
+
ray_scope=last,
|
104 |
+
remove_unused_columns=True,
|
105 |
+
report_to=['tensorboard', 'wandb'],
|
106 |
+
resume_from_checkpoint=None,
|
107 |
+
run_name=./models/dpo/phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora-2nd/Predict_20,
|
108 |
+
save_on_each_node=False,
|
109 |
+
save_only_model=False,
|
110 |
+
save_safetensors=True,
|
111 |
+
save_steps=500,
|
112 |
+
save_strategy=steps,
|
113 |
+
save_total_limit=None,
|
114 |
+
seed=42,
|
115 |
+
skip_memory_metrics=True,
|
116 |
+
sortish_sampler=False,
|
117 |
+
split_batches=False,
|
118 |
+
tf32=None,
|
119 |
+
torch_compile=False,
|
120 |
+
torch_compile_backend=None,
|
121 |
+
torch_compile_mode=None,
|
122 |
+
torchdynamo=None,
|
123 |
+
tpu_metrics_debug=False,
|
124 |
+
tpu_num_cores=None,
|
125 |
+
use_cpu=False,
|
126 |
+
use_ipex=False,
|
127 |
+
use_legacy_prediction_loop=False,
|
128 |
+
use_mps_device=False,
|
129 |
+
warmup_ratio=0.0,
|
130 |
+
warmup_steps=0,
|
131 |
+
weight_decay=0.0,
|
132 |
+
)
|
133 |
+
12/23/2023 01:04:33 - INFO - llmtuner.data.loader - Loading dataset alpaca_gpt4_data_en.json...
|
134 |
+
[WARNING|logging.py:314] 2023-12-23 01:04:34,195 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
135 |
+
|
136 |
+
12/23/2023 01:04:34 - INFO - llmtuner.model.adapter - Fine-tuning method: LoRA
|
137 |
+
12/23/2023 01:04:37 - INFO - llmtuner.model.adapter - Merged 1 adapter(s).
|
138 |
+
12/23/2023 01:04:37 - INFO - llmtuner.model.adapter - Loaded adapter(s): ./models/dpo/phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora-2nd
|
139 |
+
12/23/2023 01:04:37 - INFO - llmtuner.model.loader - trainable params: 0 || all params: 2779683840 || trainable%: 0.0000
|
140 |
+
12/23/2023 01:04:37 - INFO - llmtuner.model.loader - This IS expected that the trainable params is 0 if you are using model for inference only.
|
141 |
+
12/23/2023 01:04:37 - INFO - llmtuner.data.template - Add pad token: <|endoftext|>
|
142 |
+
|
143 |
+
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
|
144 |
+
input_ids:
|
145 |
+
[32, 8537, 1022, 257, 11040, 2836, 290, 281, 11666, 4430, 8796, 13, 383, 8796, 3607, 7613, 11, 6496, 11, 290, 23507, 7429, 284, 262, 2836, 338, 2683, 13, 198, 20490, 25, 13786, 1115, 9040, 329, 10589, 5448, 13, 198, 48902, 25]
|
146 |
+
inputs:
|
147 |
+
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.
|
148 |
+
Human: Give three tips for staying healthy.
|
149 |
+
Assistant:
|
150 |
+
|
151 |
0%| | 0/20 [00:00<?, ?it/s]
|
152 |
10%|█ | 2/20 [00:01<00:15, 1.16it/s]
|
153 |
15%|█▌ | 3/20 [00:06<00:45, 2.70s/it]
|
154 |
20%|██ | 4/20 [00:16<01:25, 5.31s/it]
|
155 |
25%|██▌ | 5/20 [00:25<01:36, 6.46s/it]
|
156 |
30%|███ | 6/20 [00:26<01:04, 4.61s/it]
|
157 |
35%|███▌ | 7/20 [00:29<00:55, 4.29s/it]
|
158 |
40%|████ | 8/20 [00:38<01:08, 5.74s/it]
|
159 |
45%|████▌ | 9/20 [00:43<00:59, 5.38s/it]
|
160 |
50%|█████ | 10/20 [00:44<00:42, 4.23s/it]
|
161 |
55%|█████▌ | 11/20 [00:48<00:37, 4.13s/it]
|
162 |
60%|██████ | 12/20 [00:50<00:26, 3.34s/it]
|
163 |
65%|██████▌ | 13/20 [00:52<00:20, 2.97s/it]
|
164 |
70%|███████ | 14/20 [01:08<00:41, 6.89s/it]
|
165 |
75%|███████▌ | 15/20 [01:09<00:25, 5.02s/it]
|
166 |
80%|████████ | 16/20 [01:21<00:28, 7.22s/it]
|
167 |
85%|████████▌ | 17/20 [01:26<00:19, 6.50s/it]
|
168 |
90%|█████████ | 18/20 [01:32<00:13, 6.54s/it]
|
169 |
95%|█████████▌| 19/20 [01:39<00:06, 6.54s/it]
|
170 |
+
Loading model from cache /tmp/jieba.cache
|
171 |
+
Loading model cost 0.703 seconds.
|
172 |
+
Prefix dict has been built successfully.
|
173 |
+
|
174 |
+
***** predict metrics *****
|
175 |
+
predict_bleu-4 = 44.3112
|
176 |
+
predict_rouge-1 = 51.57
|
177 |
+
predict_rouge-2 = 30.4331
|
178 |
+
predict_rouge-l = 36.0243
|
179 |
+
predict_runtime = 0:01:49.52
|
180 |
+
predict_samples_per_second = 0.183
|
181 |
+
predict_steps_per_second = 0.183
|
182 |
+
12/23/2023 01:06:28 - INFO - llmtuner.train.sft.trainer - Saving prediction results to ./models/dpo/phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora-2nd/Predict_20/generated_predictions.jsonl
|
Predict_20/predict_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_bleu-4": 44.311245,
|
3 |
+
"predict_rouge-1": 51.570029999999996,
|
4 |
+
"predict_rouge-2": 30.433114999999997,
|
5 |
+
"predict_rouge-l": 36.02434000000001,
|
6 |
+
"predict_runtime": 109.5257,
|
7 |
+
"predict_samples_per_second": 0.183,
|
8 |
+
"predict_steps_per_second": 0.183
|
9 |
+
}
|
README.md
CHANGED
@@ -2,4 +2,78 @@
|
|
2 |
license: other
|
3 |
license_name: microsoft-research-license
|
4 |
license_link: https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: other
|
3 |
license_name: microsoft-research-license
|
4 |
license_link: https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE
|
5 |
+
library_name: peft
|
6 |
+
tags:
|
7 |
+
- llama-factory
|
8 |
+
- lora
|
9 |
+
- generated_from_trainer
|
10 |
+
base_model: ./models/phi-2-sft-alpaca_gpt4_en-ep1
|
11 |
+
model-index:
|
12 |
+
- name: phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora
|
13 |
+
results: []
|
14 |
---
|
15 |
+
|
16 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
17 |
+
should probably proofread and complete it, then remove this comment. -->
|
18 |
+
|
19 |
+
# phi-2-sft-alpaca_gpt4_en-ep1-dpo-comparison_gpt4_en-ep1-lora
|
20 |
+
|
21 |
+
This model is a fine-tuned version of [./models/phi-2-sft-alpaca_gpt4_en-ep1](https://huggingface.co/./models/phi-2-sft-alpaca_gpt4_en-ep1) on the comparison_gpt4_en dataset.
|
22 |
+
It achieves the following results on the evaluation set:
|
23 |
+
- Loss: 0.0168
|
24 |
+
- Rewards/chosen: -1.5750
|
25 |
+
- Rewards/rejected: -11.4002
|
26 |
+
- Rewards/accuracies: 0.9956
|
27 |
+
- Rewards/margins: 9.8253
|
28 |
+
- Logps/rejected: -142.2352
|
29 |
+
- Logps/chosen: -139.5300
|
30 |
+
- Logits/rejected: 0.6066
|
31 |
+
- Logits/chosen: 0.9744
|
32 |
+
|
33 |
+
## Model description
|
34 |
+
|
35 |
+
More information needed
|
36 |
+
|
37 |
+
## Intended uses & limitations
|
38 |
+
|
39 |
+
More information needed
|
40 |
+
|
41 |
+
## Training and evaluation data
|
42 |
+
|
43 |
+
More information needed
|
44 |
+
|
45 |
+
## Training procedure
|
46 |
+
|
47 |
+
### Training hyperparameters
|
48 |
+
|
49 |
+
The following hyperparameters were used during training:
|
50 |
+
- learning_rate: 5e-05
|
51 |
+
- train_batch_size: 1
|
52 |
+
- eval_batch_size: 1
|
53 |
+
- seed: 42
|
54 |
+
- distributed_type: multi-GPU
|
55 |
+
- num_devices: 2
|
56 |
+
- gradient_accumulation_steps: 4
|
57 |
+
- total_train_batch_size: 8
|
58 |
+
- total_eval_batch_size: 2
|
59 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
60 |
+
- lr_scheduler_type: cosine
|
61 |
+
- num_epochs: 1.0
|
62 |
+
|
63 |
+
### Training results
|
64 |
+
|
65 |
+
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
66 |
+
|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
67 |
+
| 0.0534 | 0.24 | 1000 | 0.0217 | -1.6714 | -10.2359 | 0.9945 | 8.5645 | -130.5921 | -140.4941 | 0.3064 | 0.5735 |
|
68 |
+
| 0.0182 | 0.49 | 2000 | 0.0175 | -1.5469 | -10.9602 | 0.9951 | 9.4133 | -137.8349 | -139.2487 | 0.6230 | 1.0709 |
|
69 |
+
| 0.0162 | 0.73 | 3000 | 0.0171 | -1.5517 | -11.4444 | 0.9962 | 9.8927 | -142.6772 | -139.2976 | 0.6325 | 1.0048 |
|
70 |
+
| 0.0154 | 0.98 | 4000 | 0.0168 | -1.5741 | -11.4004 | 0.9956 | 9.8262 | -142.2364 | -139.5214 | 0.6051 | 0.9729 |
|
71 |
+
|
72 |
+
|
73 |
+
### Framework versions
|
74 |
+
|
75 |
+
- PEFT 0.7.1
|
76 |
+
- Transformers 4.36.2
|
77 |
+
- Pytorch 2.0.1+cu117
|
78 |
+
- Datasets 2.14.5
|
79 |
+
- Tokenizers 0.15.0
|
adapter_config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/phi-2-sft-alpaca_gpt4_en-ep1",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"Wqkv"
|
23 |
+
],
|
24 |
+
"task_type": "CAUSAL_LM"
|
25 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c225b0a6556517e2036e5fd8cfa910db8bf243f491ecd5908717b7d0b9cb4d67
|
3 |
+
size 10493896
|
added_tokens.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\t\t": 50294,
|
3 |
+
"\t\t\t": 50293,
|
4 |
+
"\t\t\t\t": 50292,
|
5 |
+
"\t\t\t\t\t": 50291,
|
6 |
+
"\t\t\t\t\t\t": 50290,
|
7 |
+
"\t\t\t\t\t\t\t": 50289,
|
8 |
+
"\t\t\t\t\t\t\t\t": 50288,
|
9 |
+
"\t\t\t\t\t\t\t\t\t": 50287,
|
10 |
+
" ": 50286,
|
11 |
+
" ": 50285,
|
12 |
+
" ": 50284,
|
13 |
+
" ": 50283,
|
14 |
+
" ": 50282,
|
15 |
+
" ": 50281,
|
16 |
+
" ": 50280,
|
17 |
+
" ": 50279,
|
18 |
+
" ": 50278,
|
19 |
+
" ": 50277,
|
20 |
+
" ": 50276,
|
21 |
+
" ": 50275,
|
22 |
+
" ": 50274,
|
23 |
+
" ": 50273,
|
24 |
+
" ": 50272,
|
25 |
+
" ": 50271,
|
26 |
+
" ": 50270,
|
27 |
+
" ": 50269,
|
28 |
+
" ": 50268,
|
29 |
+
" ": 50267,
|
30 |
+
" ": 50266,
|
31 |
+
" ": 50265,
|
32 |
+
" ": 50264,
|
33 |
+
" ": 50263,
|
34 |
+
" ": 50262,
|
35 |
+
" ": 50261,
|
36 |
+
" ": 50260,
|
37 |
+
" ": 50259,
|
38 |
+
" ": 50258,
|
39 |
+
" ": 50257
|
40 |
+
}
|
all_results.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_logits/chosen": 0.9743706583976746,
|
4 |
+
"eval_logits/rejected": 0.6065909266471863,
|
5 |
+
"eval_logps/chosen": -139.52996826171875,
|
6 |
+
"eval_logps/rejected": -142.2351837158203,
|
7 |
+
"eval_loss": 0.01678086631000042,
|
8 |
+
"eval_rewards/accuracies": 0.9956116080284119,
|
9 |
+
"eval_rewards/chosen": -1.5749770402908325,
|
10 |
+
"eval_rewards/margins": 9.825251579284668,
|
11 |
+
"eval_rewards/rejected": -11.400227546691895,
|
12 |
+
"eval_runtime": 379.7007,
|
13 |
+
"eval_samples_per_second": 9.6,
|
14 |
+
"eval_steps_per_second": 4.801,
|
15 |
+
"train_loss": 0.025599888430248966,
|
16 |
+
"train_runtime": 6987.638,
|
17 |
+
"train_samples_per_second": 4.693,
|
18 |
+
"train_steps_per_second": 0.587
|
19 |
+
}
|
checkpoint-1000/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: ./models/phi-2-sft-alpaca_gpt4_en-ep1
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
|
201 |
+
|
202 |
+
### Framework versions
|
203 |
+
|
204 |
+
- PEFT 0.7.1
|
checkpoint-1000/adapter_config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/phi-2-sft-alpaca_gpt4_en-ep1",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"Wqkv"
|
23 |
+
],
|
24 |
+
"task_type": "CAUSAL_LM"
|
25 |
+
}
|
checkpoint-1000/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:29bfbae441ccac7f3a5e01a1b0286976be920972d0ace07fd6d47b800415fef3
|
3 |
+
size 10493896
|
checkpoint-1000/added_tokens.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\t\t": 50294,
|
3 |
+
"\t\t\t": 50293,
|
4 |
+
"\t\t\t\t": 50292,
|
5 |
+
"\t\t\t\t\t": 50291,
|
6 |
+
"\t\t\t\t\t\t": 50290,
|
7 |
+
"\t\t\t\t\t\t\t": 50289,
|
8 |
+
"\t\t\t\t\t\t\t\t": 50288,
|
9 |
+
"\t\t\t\t\t\t\t\t\t": 50287,
|
10 |
+
" ": 50286,
|
11 |
+
" ": 50285,
|
12 |
+
" ": 50284,
|
13 |
+
" ": 50283,
|
14 |
+
" ": 50282,
|
15 |
+
" ": 50281,
|
16 |
+
" ": 50280,
|
17 |
+
" ": 50279,
|
18 |
+
" ": 50278,
|
19 |
+
" ": 50277,
|
20 |
+
" ": 50276,
|
21 |
+
" ": 50275,
|
22 |
+
" ": 50274,
|
23 |
+
" ": 50273,
|
24 |
+
" ": 50272,
|
25 |
+
" ": 50271,
|
26 |
+
" ": 50270,
|
27 |
+
" ": 50269,
|
28 |
+
" ": 50268,
|
29 |
+
" ": 50267,
|
30 |
+
" ": 50266,
|
31 |
+
" ": 50265,
|
32 |
+
" ": 50264,
|
33 |
+
" ": 50263,
|
34 |
+
" ": 50262,
|
35 |
+
" ": 50261,
|
36 |
+
" ": 50260,
|
37 |
+
" ": 50259,
|
38 |
+
" ": 50258,
|
39 |
+
" ": 50257
|
40 |
+
}
|
checkpoint-1000/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-1000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20e151647d64f9e2121578f521c98b7408f54267dcd5827edd9178757e795a7f
|
3 |
+
size 21025157
|
checkpoint-1000/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff49e9fd7e8b782779b5a9b561ac7a88294f19be62c93dbc40ca668ebb70f03
|
3 |
+
size 15607
|
checkpoint-1000/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d84f3e88e80f41ffd1056b31a40d0a1847fb2899262c9576d595c53d8aa2554
|
3 |
+
size 15607
|
checkpoint-1000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0d185b99a927b18d6545576313ca8230e6da67abf1d57f5c2743fb2e94742e3a
|
3 |
+
size 627
|
checkpoint-1000/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-1000/tokenizer_config.json
ADDED
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"50256": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"50257": {
|
14 |
+
"content": " ",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": false
|
20 |
+
},
|
21 |
+
"50258": {
|
22 |
+
"content": " ",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": false
|
28 |
+
},
|
29 |
+
"50259": {
|
30 |
+
"content": " ",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
},
|
37 |
+
"50260": {
|
38 |
+
"content": " ",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": false
|
44 |
+
},
|
45 |
+
"50261": {
|
46 |
+
"content": " ",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": true,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": false
|
52 |
+
},
|
53 |
+
"50262": {
|
54 |
+
"content": " ",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": true,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": false
|
60 |
+
},
|
61 |
+
"50263": {
|
62 |
+
"content": " ",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": true,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": false
|
68 |
+
},
|
69 |
+
"50264": {
|
70 |
+
"content": " ",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": true,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": false
|
76 |
+
},
|
77 |
+
"50265": {
|
78 |
+
"content": " ",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": true,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": false
|
84 |
+
},
|
85 |
+
"50266": {
|
86 |
+
"content": " ",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": true,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": false
|
92 |
+
},
|
93 |
+
"50267": {
|
94 |
+
"content": " ",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": true,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": false
|
100 |
+
},
|
101 |
+
"50268": {
|
102 |
+
"content": " ",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": true,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": false
|
108 |
+
},
|
109 |
+
"50269": {
|
110 |
+
"content": " ",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": true,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": false
|
116 |
+
},
|
117 |
+
"50270": {
|
118 |
+
"content": " ",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": true,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"50271": {
|
126 |
+
"content": " ",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": true,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"50272": {
|
134 |
+
"content": " ",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": true,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"50273": {
|
142 |
+
"content": " ",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": true,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"50274": {
|
150 |
+
"content": " ",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": true,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"50275": {
|
158 |
+
"content": " ",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": true,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"50276": {
|
166 |
+
"content": " ",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": true,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"50277": {
|
174 |
+
"content": " ",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": true,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
},
|
181 |
+
"50278": {
|
182 |
+
"content": " ",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": true,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": false
|
188 |
+
},
|
189 |
+
"50279": {
|
190 |
+
"content": " ",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": true,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": false
|
196 |
+
},
|
197 |
+
"50280": {
|
198 |
+
"content": " ",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": true,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": false
|
204 |
+
},
|
205 |
+
"50281": {
|
206 |
+
"content": " ",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": true,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": false
|
212 |
+
},
|
213 |
+
"50282": {
|
214 |
+
"content": " ",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": true,
|
217 |
+
"rstrip": false,
|
218 |
+
"single_word": false,
|
219 |
+
"special": false
|
220 |
+
},
|
221 |
+
"50283": {
|
222 |
+
"content": " ",
|
223 |
+
"lstrip": false,
|
224 |
+
"normalized": true,
|
225 |
+
"rstrip": false,
|
226 |
+
"single_word": false,
|
227 |
+
"special": false
|
228 |
+
},
|
229 |
+
"50284": {
|
230 |
+
"content": " ",
|
231 |
+
"lstrip": false,
|
232 |
+
"normalized": true,
|
233 |
+
"rstrip": false,
|
234 |
+
"single_word": false,
|
235 |
+
"special": false
|
236 |
+
},
|
237 |
+
"50285": {
|
238 |
+
"content": " ",
|
239 |
+
"lstrip": false,
|
240 |
+
"normalized": true,
|
241 |
+
"rstrip": false,
|
242 |
+
"single_word": false,
|
243 |
+
"special": false
|
244 |
+
},
|
245 |
+
"50286": {
|
246 |
+
"content": " ",
|
247 |
+
"lstrip": false,
|
248 |
+
"normalized": true,
|
249 |
+
"rstrip": false,
|
250 |
+
"single_word": false,
|
251 |
+
"special": false
|
252 |
+
},
|
253 |
+
"50287": {
|
254 |
+
"content": "\t\t\t\t\t\t\t\t\t",
|
255 |
+
"lstrip": false,
|
256 |
+
"normalized": true,
|
257 |
+
"rstrip": false,
|
258 |
+
"single_word": false,
|
259 |
+
"special": false
|
260 |
+
},
|
261 |
+
"50288": {
|
262 |
+
"content": "\t\t\t\t\t\t\t\t",
|
263 |
+
"lstrip": false,
|
264 |
+
"normalized": true,
|
265 |
+
"rstrip": false,
|
266 |
+
"single_word": false,
|
267 |
+
"special": false
|
268 |
+
},
|
269 |
+
"50289": {
|
270 |
+
"content": "\t\t\t\t\t\t\t",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": true,
|
273 |
+
"rstrip": false,
|
274 |
+
"single_word": false,
|
275 |
+
"special": false
|
276 |
+
},
|
277 |
+
"50290": {
|
278 |
+
"content": "\t\t\t\t\t\t",
|
279 |
+
"lstrip": false,
|
280 |
+
"normalized": true,
|
281 |
+
"rstrip": false,
|
282 |
+
"single_word": false,
|
283 |
+
"special": false
|
284 |
+
},
|
285 |
+
"50291": {
|
286 |
+
"content": "\t\t\t\t\t",
|
287 |
+
"lstrip": false,
|
288 |
+
"normalized": true,
|
289 |
+
"rstrip": false,
|
290 |
+
"single_word": false,
|
291 |
+
"special": false
|
292 |
+
},
|
293 |
+
"50292": {
|
294 |
+
"content": "\t\t\t\t",
|
295 |
+
"lstrip": false,
|
296 |
+
"normalized": true,
|
297 |
+
"rstrip": false,
|
298 |
+
"single_word": false,
|
299 |
+
"special": false
|
300 |
+
},
|
301 |
+
"50293": {
|
302 |
+
"content": "\t\t\t",
|
303 |
+
"lstrip": false,
|
304 |
+
"normalized": true,
|
305 |
+
"rstrip": false,
|
306 |
+
"single_word": false,
|
307 |
+
"special": false
|
308 |
+
},
|
309 |
+
"50294": {
|
310 |
+
"content": "\t\t",
|
311 |
+
"lstrip": false,
|
312 |
+
"normalized": true,
|
313 |
+
"rstrip": false,
|
314 |
+
"single_word": false,
|
315 |
+
"special": false
|
316 |
+
}
|
317 |
+
},
|
318 |
+
"bos_token": "<|endoftext|>",
|
319 |
+
"clean_up_tokenization_spaces": true,
|
320 |
+
"eos_token": "<|endoftext|>",
|
321 |
+
"errors": "replace",
|
322 |
+
"model_max_length": 2048,
|
323 |
+
"pad_token": "<|endoftext|>",
|
324 |
+
"padding_side": "right",
|
325 |
+
"split_special_tokens": false,
|
326 |
+
"tokenizer_class": "CodeGenTokenizer",
|
327 |
+
"unk_token": "<|endoftext|>"
|
328 |
+
}
|
checkpoint-1000/trainer_state.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.24393218685205512,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 1000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.24,
|
13 |
+
"learning_rate": 4.30097988740375e-05,
|
14 |
+
"logits/chosen": 0.4760148227214813,
|
15 |
+
"logits/rejected": 0.18961936235427856,
|
16 |
+
"logps/chosen": -138.3477783203125,
|
17 |
+
"logps/rejected": -107.43299865722656,
|
18 |
+
"loss": 0.0534,
|
19 |
+
"rewards/accuracies": 0.9904999732971191,
|
20 |
+
"rewards/chosen": -1.208303689956665,
|
21 |
+
"rewards/margins": 6.7370734214782715,
|
22 |
+
"rewards/rejected": -7.945377349853516,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"eval_logits/chosen": 0.5734850168228149,
|
28 |
+
"eval_logits/rejected": 0.30642059445381165,
|
29 |
+
"eval_logps/chosen": -140.49411010742188,
|
30 |
+
"eval_logps/rejected": -130.59207153320312,
|
31 |
+
"eval_loss": 0.021729281172156334,
|
32 |
+
"eval_rewards/accuracies": 0.994514524936676,
|
33 |
+
"eval_rewards/chosen": -1.671390414237976,
|
34 |
+
"eval_rewards/margins": 8.56452751159668,
|
35 |
+
"eval_rewards/rejected": -10.235918045043945,
|
36 |
+
"eval_runtime": 379.8081,
|
37 |
+
"eval_samples_per_second": 9.597,
|
38 |
+
"eval_steps_per_second": 4.8,
|
39 |
+
"step": 1000
|
40 |
+
}
|
41 |
+
],
|
42 |
+
"logging_steps": 1000,
|
43 |
+
"max_steps": 4099,
|
44 |
+
"num_input_tokens_seen": 0,
|
45 |
+
"num_train_epochs": 1,
|
46 |
+
"save_steps": 1000,
|
47 |
+
"total_flos": 4.588610344556954e+16,
|
48 |
+
"train_batch_size": 1,
|
49 |
+
"trial_name": null,
|
50 |
+
"trial_params": null
|
51 |
+
}
|
checkpoint-1000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e1e080eaa2d6e4dd6be714c1ceaa538ec3b328a23e36d3aded3c04c0da7f56e
|
3 |
+
size 4539
|
checkpoint-1000/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2000/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: ./models/phi-2-sft-alpaca_gpt4_en-ep1
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
|
201 |
+
|
202 |
+
### Framework versions
|
203 |
+
|
204 |
+
- PEFT 0.7.1
|
checkpoint-2000/adapter_config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/phi-2-sft-alpaca_gpt4_en-ep1",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"Wqkv"
|
23 |
+
],
|
24 |
+
"task_type": "CAUSAL_LM"
|
25 |
+
}
|
checkpoint-2000/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5fb2682bfeb55732bed0425fbd15e3d307ae3b46a09c7d02a95804160cc578c
|
3 |
+
size 10493896
|
checkpoint-2000/added_tokens.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\t\t": 50294,
|
3 |
+
"\t\t\t": 50293,
|
4 |
+
"\t\t\t\t": 50292,
|
5 |
+
"\t\t\t\t\t": 50291,
|
6 |
+
"\t\t\t\t\t\t": 50290,
|
7 |
+
"\t\t\t\t\t\t\t": 50289,
|
8 |
+
"\t\t\t\t\t\t\t\t": 50288,
|
9 |
+
"\t\t\t\t\t\t\t\t\t": 50287,
|
10 |
+
" ": 50286,
|
11 |
+
" ": 50285,
|
12 |
+
" ": 50284,
|
13 |
+
" ": 50283,
|
14 |
+
" ": 50282,
|
15 |
+
" ": 50281,
|
16 |
+
" ": 50280,
|
17 |
+
" ": 50279,
|
18 |
+
" ": 50278,
|
19 |
+
" ": 50277,
|
20 |
+
" ": 50276,
|
21 |
+
" ": 50275,
|
22 |
+
" ": 50274,
|
23 |
+
" ": 50273,
|
24 |
+
" ": 50272,
|
25 |
+
" ": 50271,
|
26 |
+
" ": 50270,
|
27 |
+
" ": 50269,
|
28 |
+
" ": 50268,
|
29 |
+
" ": 50267,
|
30 |
+
" ": 50266,
|
31 |
+
" ": 50265,
|
32 |
+
" ": 50264,
|
33 |
+
" ": 50263,
|
34 |
+
" ": 50262,
|
35 |
+
" ": 50261,
|
36 |
+
" ": 50260,
|
37 |
+
" ": 50259,
|
38 |
+
" ": 50258,
|
39 |
+
" ": 50257
|
40 |
+
}
|
checkpoint-2000/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3906ad35d7efa506ebe14675c01a9dcb05a5a85815111f09a6c44dff7d217861
|
3 |
+
size 21025157
|
checkpoint-2000/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cced889bd219811a4aa198fe40ea1baed7c90c6e40af3fac42a460b7ba5b033
|
3 |
+
size 15607
|
checkpoint-2000/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8633b35217301daf5072d5fa06b71e97db8c33240e5727f3abaa69130af343bb
|
3 |
+
size 15607
|
checkpoint-2000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee4b2372ff7007578bc536422e1b403d0fc6d4aff011249b2745e00c64d348e2
|
3 |
+
size 627
|
checkpoint-2000/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-2000/tokenizer_config.json
ADDED
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"50256": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"50257": {
|
14 |
+
"content": " ",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": false
|
20 |
+
},
|
21 |
+
"50258": {
|
22 |
+
"content": " ",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": false
|
28 |
+
},
|
29 |
+
"50259": {
|
30 |
+
"content": " ",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
},
|
37 |
+
"50260": {
|
38 |
+
"content": " ",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": false
|
44 |
+
},
|
45 |
+
"50261": {
|
46 |
+
"content": " ",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": true,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": false
|
52 |
+
},
|
53 |
+
"50262": {
|
54 |
+
"content": " ",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": true,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": false
|
60 |
+
},
|
61 |
+
"50263": {
|
62 |
+
"content": " ",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": true,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": false
|
68 |
+
},
|
69 |
+
"50264": {
|
70 |
+
"content": " ",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": true,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": false
|
76 |
+
},
|
77 |
+
"50265": {
|
78 |
+
"content": " ",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": true,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": false
|
84 |
+
},
|
85 |
+
"50266": {
|
86 |
+
"content": " ",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": true,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": false
|
92 |
+
},
|
93 |
+
"50267": {
|
94 |
+
"content": " ",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": true,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": false
|
100 |
+
},
|
101 |
+
"50268": {
|
102 |
+
"content": " ",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": true,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": false
|
108 |
+
},
|
109 |
+
"50269": {
|
110 |
+
"content": " ",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": true,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": false
|
116 |
+
},
|
117 |
+
"50270": {
|
118 |
+
"content": " ",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": true,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"50271": {
|
126 |
+
"content": " ",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": true,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"50272": {
|
134 |
+
"content": " ",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": true,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"50273": {
|
142 |
+
"content": " ",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": true,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"50274": {
|
150 |
+
"content": " ",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": true,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"50275": {
|
158 |
+
"content": " ",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": true,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"50276": {
|
166 |
+
"content": " ",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": true,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"50277": {
|
174 |
+
"content": " ",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": true,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
},
|
181 |
+
"50278": {
|
182 |
+
"content": " ",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": true,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": false
|
188 |
+
},
|
189 |
+
"50279": {
|
190 |
+
"content": " ",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": true,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": false
|
196 |
+
},
|
197 |
+
"50280": {
|
198 |
+
"content": " ",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": true,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": false
|
204 |
+
},
|
205 |
+
"50281": {
|
206 |
+
"content": " ",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": true,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": false
|
212 |
+
},
|
213 |
+
"50282": {
|
214 |
+
"content": " ",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": true,
|
217 |
+
"rstrip": false,
|
218 |
+
"single_word": false,
|
219 |
+
"special": false
|
220 |
+
},
|
221 |
+
"50283": {
|
222 |
+
"content": " ",
|
223 |
+
"lstrip": false,
|
224 |
+
"normalized": true,
|
225 |
+
"rstrip": false,
|
226 |
+
"single_word": false,
|
227 |
+
"special": false
|
228 |
+
},
|
229 |
+
"50284": {
|
230 |
+
"content": " ",
|
231 |
+
"lstrip": false,
|
232 |
+
"normalized": true,
|
233 |
+
"rstrip": false,
|
234 |
+
"single_word": false,
|
235 |
+
"special": false
|
236 |
+
},
|
237 |
+
"50285": {
|
238 |
+
"content": " ",
|
239 |
+
"lstrip": false,
|
240 |
+
"normalized": true,
|
241 |
+
"rstrip": false,
|
242 |
+
"single_word": false,
|
243 |
+
"special": false
|
244 |
+
},
|
245 |
+
"50286": {
|
246 |
+
"content": " ",
|
247 |
+
"lstrip": false,
|
248 |
+
"normalized": true,
|
249 |
+
"rstrip": false,
|
250 |
+
"single_word": false,
|
251 |
+
"special": false
|
252 |
+
},
|
253 |
+
"50287": {
|
254 |
+
"content": "\t\t\t\t\t\t\t\t\t",
|
255 |
+
"lstrip": false,
|
256 |
+
"normalized": true,
|
257 |
+
"rstrip": false,
|
258 |
+
"single_word": false,
|
259 |
+
"special": false
|
260 |
+
},
|
261 |
+
"50288": {
|
262 |
+
"content": "\t\t\t\t\t\t\t\t",
|
263 |
+
"lstrip": false,
|
264 |
+
"normalized": true,
|
265 |
+
"rstrip": false,
|
266 |
+
"single_word": false,
|
267 |
+
"special": false
|
268 |
+
},
|
269 |
+
"50289": {
|
270 |
+
"content": "\t\t\t\t\t\t\t",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": true,
|
273 |
+
"rstrip": false,
|
274 |
+
"single_word": false,
|
275 |
+
"special": false
|
276 |
+
},
|
277 |
+
"50290": {
|
278 |
+
"content": "\t\t\t\t\t\t",
|
279 |
+
"lstrip": false,
|
280 |
+
"normalized": true,
|
281 |
+
"rstrip": false,
|
282 |
+
"single_word": false,
|
283 |
+
"special": false
|
284 |
+
},
|
285 |
+
"50291": {
|
286 |
+
"content": "\t\t\t\t\t",
|
287 |
+
"lstrip": false,
|
288 |
+
"normalized": true,
|
289 |
+
"rstrip": false,
|
290 |
+
"single_word": false,
|
291 |
+
"special": false
|
292 |
+
},
|
293 |
+
"50292": {
|
294 |
+
"content": "\t\t\t\t",
|
295 |
+
"lstrip": false,
|
296 |
+
"normalized": true,
|
297 |
+
"rstrip": false,
|
298 |
+
"single_word": false,
|
299 |
+
"special": false
|
300 |
+
},
|
301 |
+
"50293": {
|
302 |
+
"content": "\t\t\t",
|
303 |
+
"lstrip": false,
|
304 |
+
"normalized": true,
|
305 |
+
"rstrip": false,
|
306 |
+
"single_word": false,
|
307 |
+
"special": false
|
308 |
+
},
|
309 |
+
"50294": {
|
310 |
+
"content": "\t\t",
|
311 |
+
"lstrip": false,
|
312 |
+
"normalized": true,
|
313 |
+
"rstrip": false,
|
314 |
+
"single_word": false,
|
315 |
+
"special": false
|
316 |
+
}
|
317 |
+
},
|
318 |
+
"bos_token": "<|endoftext|>",
|
319 |
+
"clean_up_tokenization_spaces": true,
|
320 |
+
"eos_token": "<|endoftext|>",
|
321 |
+
"errors": "replace",
|
322 |
+
"model_max_length": 2048,
|
323 |
+
"pad_token": "<|endoftext|>",
|
324 |
+
"padding_side": "right",
|
325 |
+
"split_special_tokens": false,
|
326 |
+
"tokenizer_class": "CodeGenTokenizer",
|
327 |
+
"unk_token": "<|endoftext|>"
|
328 |
+
}
|
checkpoint-2000/trainer_state.json
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.48786437370411023,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 2000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.24,
|
13 |
+
"learning_rate": 4.30097988740375e-05,
|
14 |
+
"logits/chosen": 0.4760148227214813,
|
15 |
+
"logits/rejected": 0.18961936235427856,
|
16 |
+
"logps/chosen": -138.3477783203125,
|
17 |
+
"logps/rejected": -107.43299865722656,
|
18 |
+
"loss": 0.0534,
|
19 |
+
"rewards/accuracies": 0.9904999732971191,
|
20 |
+
"rewards/chosen": -1.208303689956665,
|
21 |
+
"rewards/margins": 6.7370734214782715,
|
22 |
+
"rewards/rejected": -7.945377349853516,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"eval_logits/chosen": 0.5734850168228149,
|
28 |
+
"eval_logits/rejected": 0.30642059445381165,
|
29 |
+
"eval_logps/chosen": -140.49411010742188,
|
30 |
+
"eval_logps/rejected": -130.59207153320312,
|
31 |
+
"eval_loss": 0.021729281172156334,
|
32 |
+
"eval_rewards/accuracies": 0.994514524936676,
|
33 |
+
"eval_rewards/chosen": -1.671390414237976,
|
34 |
+
"eval_rewards/margins": 8.56452751159668,
|
35 |
+
"eval_rewards/rejected": -10.235918045043945,
|
36 |
+
"eval_runtime": 379.8081,
|
37 |
+
"eval_samples_per_second": 9.597,
|
38 |
+
"eval_steps_per_second": 4.8,
|
39 |
+
"step": 1000
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.49,
|
43 |
+
"learning_rate": 2.594822843866259e-05,
|
44 |
+
"logits/chosen": 0.9377780556678772,
|
45 |
+
"logits/rejected": 0.5601473450660706,
|
46 |
+
"logps/chosen": -137.52035522460938,
|
47 |
+
"logps/rejected": -132.42645263671875,
|
48 |
+
"loss": 0.0182,
|
49 |
+
"rewards/accuracies": 0.9955000281333923,
|
50 |
+
"rewards/chosen": -1.4818373918533325,
|
51 |
+
"rewards/margins": 8.971912384033203,
|
52 |
+
"rewards/rejected": -10.453749656677246,
|
53 |
+
"step": 2000
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 0.49,
|
57 |
+
"eval_logits/chosen": 1.070937156677246,
|
58 |
+
"eval_logits/rejected": 0.6229708194732666,
|
59 |
+
"eval_logps/chosen": -139.24871826171875,
|
60 |
+
"eval_logps/rejected": -137.83485412597656,
|
61 |
+
"eval_loss": 0.01748650148510933,
|
62 |
+
"eval_rewards/accuracies": 0.995063066482544,
|
63 |
+
"eval_rewards/chosen": -1.5468522310256958,
|
64 |
+
"eval_rewards/margins": 9.413342475891113,
|
65 |
+
"eval_rewards/rejected": -10.96019458770752,
|
66 |
+
"eval_runtime": 382.2871,
|
67 |
+
"eval_samples_per_second": 9.535,
|
68 |
+
"eval_steps_per_second": 4.769,
|
69 |
+
"step": 2000
|
70 |
+
}
|
71 |
+
],
|
72 |
+
"logging_steps": 1000,
|
73 |
+
"max_steps": 4099,
|
74 |
+
"num_input_tokens_seen": 0,
|
75 |
+
"num_train_epochs": 1,
|
76 |
+
"save_steps": 1000,
|
77 |
+
"total_flos": 9.05749503976407e+16,
|
78 |
+
"train_batch_size": 1,
|
79 |
+
"trial_name": null,
|
80 |
+
"trial_params": null
|
81 |
+
}
|
checkpoint-2000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e1e080eaa2d6e4dd6be714c1ceaa538ec3b328a23e36d3aded3c04c0da7f56e
|
3 |
+
size 4539
|
checkpoint-2000/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/README.md
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: peft
|
3 |
+
base_model: ./models/phi-2-sft-alpaca_gpt4_en-ep1
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
|
201 |
+
|
202 |
+
### Framework versions
|
203 |
+
|
204 |
+
- PEFT 0.7.1
|
checkpoint-3000/adapter_config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "./models/phi-2-sft-alpaca_gpt4_en-ep1",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layers_pattern": null,
|
10 |
+
"layers_to_transform": null,
|
11 |
+
"loftq_config": {},
|
12 |
+
"lora_alpha": 16,
|
13 |
+
"lora_dropout": 0.1,
|
14 |
+
"megatron_config": null,
|
15 |
+
"megatron_core": "megatron.core",
|
16 |
+
"modules_to_save": null,
|
17 |
+
"peft_type": "LORA",
|
18 |
+
"r": 8,
|
19 |
+
"rank_pattern": {},
|
20 |
+
"revision": null,
|
21 |
+
"target_modules": [
|
22 |
+
"Wqkv"
|
23 |
+
],
|
24 |
+
"task_type": "CAUSAL_LM"
|
25 |
+
}
|
checkpoint-3000/adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf12d71fe6e378021a1c7f7ad86970a743d7b58d57bf4ce689340ab5393dc2f8
|
3 |
+
size 10493896
|
checkpoint-3000/added_tokens.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"\t\t": 50294,
|
3 |
+
"\t\t\t": 50293,
|
4 |
+
"\t\t\t\t": 50292,
|
5 |
+
"\t\t\t\t\t": 50291,
|
6 |
+
"\t\t\t\t\t\t": 50290,
|
7 |
+
"\t\t\t\t\t\t\t": 50289,
|
8 |
+
"\t\t\t\t\t\t\t\t": 50288,
|
9 |
+
"\t\t\t\t\t\t\t\t\t": 50287,
|
10 |
+
" ": 50286,
|
11 |
+
" ": 50285,
|
12 |
+
" ": 50284,
|
13 |
+
" ": 50283,
|
14 |
+
" ": 50282,
|
15 |
+
" ": 50281,
|
16 |
+
" ": 50280,
|
17 |
+
" ": 50279,
|
18 |
+
" ": 50278,
|
19 |
+
" ": 50277,
|
20 |
+
" ": 50276,
|
21 |
+
" ": 50275,
|
22 |
+
" ": 50274,
|
23 |
+
" ": 50273,
|
24 |
+
" ": 50272,
|
25 |
+
" ": 50271,
|
26 |
+
" ": 50270,
|
27 |
+
" ": 50269,
|
28 |
+
" ": 50268,
|
29 |
+
" ": 50267,
|
30 |
+
" ": 50266,
|
31 |
+
" ": 50265,
|
32 |
+
" ": 50264,
|
33 |
+
" ": 50263,
|
34 |
+
" ": 50262,
|
35 |
+
" ": 50261,
|
36 |
+
" ": 50260,
|
37 |
+
" ": 50259,
|
38 |
+
" ": 50258,
|
39 |
+
" ": 50257
|
40 |
+
}
|
checkpoint-3000/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed36bf39922d122f8345ce65194c0892121e3c145640573345b0f97c3517421a
|
3 |
+
size 21025157
|
checkpoint-3000/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fa4d6f9ffd4894c26ef162571d110a2094cb9e0dcd311267dce3a52586b87f7
|
3 |
+
size 15607
|
checkpoint-3000/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab759ba0b99fde48e35d18628c4bb42a7d853fff16d56d019bc6319e7231be53
|
3 |
+
size 15607
|
checkpoint-3000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a892a349463691b0a4e838a0c42b267e539c035c5c8b226ba6192ede908a8641
|
3 |
+
size 627
|
checkpoint-3000/special_tokens_map.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|endoftext|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|endoftext|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": "<|endoftext|>",
|
17 |
+
"unk_token": {
|
18 |
+
"content": "<|endoftext|>",
|
19 |
+
"lstrip": false,
|
20 |
+
"normalized": false,
|
21 |
+
"rstrip": false,
|
22 |
+
"single_word": false
|
23 |
+
}
|
24 |
+
}
|
checkpoint-3000/tokenizer_config.json
ADDED
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": false,
|
3 |
+
"add_prefix_space": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"50256": {
|
6 |
+
"content": "<|endoftext|>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"50257": {
|
14 |
+
"content": " ",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": true,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": false
|
20 |
+
},
|
21 |
+
"50258": {
|
22 |
+
"content": " ",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": true,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": false
|
28 |
+
},
|
29 |
+
"50259": {
|
30 |
+
"content": " ",
|
31 |
+
"lstrip": false,
|
32 |
+
"normalized": true,
|
33 |
+
"rstrip": false,
|
34 |
+
"single_word": false,
|
35 |
+
"special": false
|
36 |
+
},
|
37 |
+
"50260": {
|
38 |
+
"content": " ",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false,
|
43 |
+
"special": false
|
44 |
+
},
|
45 |
+
"50261": {
|
46 |
+
"content": " ",
|
47 |
+
"lstrip": false,
|
48 |
+
"normalized": true,
|
49 |
+
"rstrip": false,
|
50 |
+
"single_word": false,
|
51 |
+
"special": false
|
52 |
+
},
|
53 |
+
"50262": {
|
54 |
+
"content": " ",
|
55 |
+
"lstrip": false,
|
56 |
+
"normalized": true,
|
57 |
+
"rstrip": false,
|
58 |
+
"single_word": false,
|
59 |
+
"special": false
|
60 |
+
},
|
61 |
+
"50263": {
|
62 |
+
"content": " ",
|
63 |
+
"lstrip": false,
|
64 |
+
"normalized": true,
|
65 |
+
"rstrip": false,
|
66 |
+
"single_word": false,
|
67 |
+
"special": false
|
68 |
+
},
|
69 |
+
"50264": {
|
70 |
+
"content": " ",
|
71 |
+
"lstrip": false,
|
72 |
+
"normalized": true,
|
73 |
+
"rstrip": false,
|
74 |
+
"single_word": false,
|
75 |
+
"special": false
|
76 |
+
},
|
77 |
+
"50265": {
|
78 |
+
"content": " ",
|
79 |
+
"lstrip": false,
|
80 |
+
"normalized": true,
|
81 |
+
"rstrip": false,
|
82 |
+
"single_word": false,
|
83 |
+
"special": false
|
84 |
+
},
|
85 |
+
"50266": {
|
86 |
+
"content": " ",
|
87 |
+
"lstrip": false,
|
88 |
+
"normalized": true,
|
89 |
+
"rstrip": false,
|
90 |
+
"single_word": false,
|
91 |
+
"special": false
|
92 |
+
},
|
93 |
+
"50267": {
|
94 |
+
"content": " ",
|
95 |
+
"lstrip": false,
|
96 |
+
"normalized": true,
|
97 |
+
"rstrip": false,
|
98 |
+
"single_word": false,
|
99 |
+
"special": false
|
100 |
+
},
|
101 |
+
"50268": {
|
102 |
+
"content": " ",
|
103 |
+
"lstrip": false,
|
104 |
+
"normalized": true,
|
105 |
+
"rstrip": false,
|
106 |
+
"single_word": false,
|
107 |
+
"special": false
|
108 |
+
},
|
109 |
+
"50269": {
|
110 |
+
"content": " ",
|
111 |
+
"lstrip": false,
|
112 |
+
"normalized": true,
|
113 |
+
"rstrip": false,
|
114 |
+
"single_word": false,
|
115 |
+
"special": false
|
116 |
+
},
|
117 |
+
"50270": {
|
118 |
+
"content": " ",
|
119 |
+
"lstrip": false,
|
120 |
+
"normalized": true,
|
121 |
+
"rstrip": false,
|
122 |
+
"single_word": false,
|
123 |
+
"special": false
|
124 |
+
},
|
125 |
+
"50271": {
|
126 |
+
"content": " ",
|
127 |
+
"lstrip": false,
|
128 |
+
"normalized": true,
|
129 |
+
"rstrip": false,
|
130 |
+
"single_word": false,
|
131 |
+
"special": false
|
132 |
+
},
|
133 |
+
"50272": {
|
134 |
+
"content": " ",
|
135 |
+
"lstrip": false,
|
136 |
+
"normalized": true,
|
137 |
+
"rstrip": false,
|
138 |
+
"single_word": false,
|
139 |
+
"special": false
|
140 |
+
},
|
141 |
+
"50273": {
|
142 |
+
"content": " ",
|
143 |
+
"lstrip": false,
|
144 |
+
"normalized": true,
|
145 |
+
"rstrip": false,
|
146 |
+
"single_word": false,
|
147 |
+
"special": false
|
148 |
+
},
|
149 |
+
"50274": {
|
150 |
+
"content": " ",
|
151 |
+
"lstrip": false,
|
152 |
+
"normalized": true,
|
153 |
+
"rstrip": false,
|
154 |
+
"single_word": false,
|
155 |
+
"special": false
|
156 |
+
},
|
157 |
+
"50275": {
|
158 |
+
"content": " ",
|
159 |
+
"lstrip": false,
|
160 |
+
"normalized": true,
|
161 |
+
"rstrip": false,
|
162 |
+
"single_word": false,
|
163 |
+
"special": false
|
164 |
+
},
|
165 |
+
"50276": {
|
166 |
+
"content": " ",
|
167 |
+
"lstrip": false,
|
168 |
+
"normalized": true,
|
169 |
+
"rstrip": false,
|
170 |
+
"single_word": false,
|
171 |
+
"special": false
|
172 |
+
},
|
173 |
+
"50277": {
|
174 |
+
"content": " ",
|
175 |
+
"lstrip": false,
|
176 |
+
"normalized": true,
|
177 |
+
"rstrip": false,
|
178 |
+
"single_word": false,
|
179 |
+
"special": false
|
180 |
+
},
|
181 |
+
"50278": {
|
182 |
+
"content": " ",
|
183 |
+
"lstrip": false,
|
184 |
+
"normalized": true,
|
185 |
+
"rstrip": false,
|
186 |
+
"single_word": false,
|
187 |
+
"special": false
|
188 |
+
},
|
189 |
+
"50279": {
|
190 |
+
"content": " ",
|
191 |
+
"lstrip": false,
|
192 |
+
"normalized": true,
|
193 |
+
"rstrip": false,
|
194 |
+
"single_word": false,
|
195 |
+
"special": false
|
196 |
+
},
|
197 |
+
"50280": {
|
198 |
+
"content": " ",
|
199 |
+
"lstrip": false,
|
200 |
+
"normalized": true,
|
201 |
+
"rstrip": false,
|
202 |
+
"single_word": false,
|
203 |
+
"special": false
|
204 |
+
},
|
205 |
+
"50281": {
|
206 |
+
"content": " ",
|
207 |
+
"lstrip": false,
|
208 |
+
"normalized": true,
|
209 |
+
"rstrip": false,
|
210 |
+
"single_word": false,
|
211 |
+
"special": false
|
212 |
+
},
|
213 |
+
"50282": {
|
214 |
+
"content": " ",
|
215 |
+
"lstrip": false,
|
216 |
+
"normalized": true,
|
217 |
+
"rstrip": false,
|
218 |
+
"single_word": false,
|
219 |
+
"special": false
|
220 |
+
},
|
221 |
+
"50283": {
|
222 |
+
"content": " ",
|
223 |
+
"lstrip": false,
|
224 |
+
"normalized": true,
|
225 |
+
"rstrip": false,
|
226 |
+
"single_word": false,
|
227 |
+
"special": false
|
228 |
+
},
|
229 |
+
"50284": {
|
230 |
+
"content": " ",
|
231 |
+
"lstrip": false,
|
232 |
+
"normalized": true,
|
233 |
+
"rstrip": false,
|
234 |
+
"single_word": false,
|
235 |
+
"special": false
|
236 |
+
},
|
237 |
+
"50285": {
|
238 |
+
"content": " ",
|
239 |
+
"lstrip": false,
|
240 |
+
"normalized": true,
|
241 |
+
"rstrip": false,
|
242 |
+
"single_word": false,
|
243 |
+
"special": false
|
244 |
+
},
|
245 |
+
"50286": {
|
246 |
+
"content": " ",
|
247 |
+
"lstrip": false,
|
248 |
+
"normalized": true,
|
249 |
+
"rstrip": false,
|
250 |
+
"single_word": false,
|
251 |
+
"special": false
|
252 |
+
},
|
253 |
+
"50287": {
|
254 |
+
"content": "\t\t\t\t\t\t\t\t\t",
|
255 |
+
"lstrip": false,
|
256 |
+
"normalized": true,
|
257 |
+
"rstrip": false,
|
258 |
+
"single_word": false,
|
259 |
+
"special": false
|
260 |
+
},
|
261 |
+
"50288": {
|
262 |
+
"content": "\t\t\t\t\t\t\t\t",
|
263 |
+
"lstrip": false,
|
264 |
+
"normalized": true,
|
265 |
+
"rstrip": false,
|
266 |
+
"single_word": false,
|
267 |
+
"special": false
|
268 |
+
},
|
269 |
+
"50289": {
|
270 |
+
"content": "\t\t\t\t\t\t\t",
|
271 |
+
"lstrip": false,
|
272 |
+
"normalized": true,
|
273 |
+
"rstrip": false,
|
274 |
+
"single_word": false,
|
275 |
+
"special": false
|
276 |
+
},
|
277 |
+
"50290": {
|
278 |
+
"content": "\t\t\t\t\t\t",
|
279 |
+
"lstrip": false,
|
280 |
+
"normalized": true,
|
281 |
+
"rstrip": false,
|
282 |
+
"single_word": false,
|
283 |
+
"special": false
|
284 |
+
},
|
285 |
+
"50291": {
|
286 |
+
"content": "\t\t\t\t\t",
|
287 |
+
"lstrip": false,
|
288 |
+
"normalized": true,
|
289 |
+
"rstrip": false,
|
290 |
+
"single_word": false,
|
291 |
+
"special": false
|
292 |
+
},
|
293 |
+
"50292": {
|
294 |
+
"content": "\t\t\t\t",
|
295 |
+
"lstrip": false,
|
296 |
+
"normalized": true,
|
297 |
+
"rstrip": false,
|
298 |
+
"single_word": false,
|
299 |
+
"special": false
|
300 |
+
},
|
301 |
+
"50293": {
|
302 |
+
"content": "\t\t\t",
|
303 |
+
"lstrip": false,
|
304 |
+
"normalized": true,
|
305 |
+
"rstrip": false,
|
306 |
+
"single_word": false,
|
307 |
+
"special": false
|
308 |
+
},
|
309 |
+
"50294": {
|
310 |
+
"content": "\t\t",
|
311 |
+
"lstrip": false,
|
312 |
+
"normalized": true,
|
313 |
+
"rstrip": false,
|
314 |
+
"single_word": false,
|
315 |
+
"special": false
|
316 |
+
}
|
317 |
+
},
|
318 |
+
"bos_token": "<|endoftext|>",
|
319 |
+
"clean_up_tokenization_spaces": true,
|
320 |
+
"eos_token": "<|endoftext|>",
|
321 |
+
"errors": "replace",
|
322 |
+
"model_max_length": 2048,
|
323 |
+
"pad_token": "<|endoftext|>",
|
324 |
+
"padding_side": "right",
|
325 |
+
"split_special_tokens": false,
|
326 |
+
"tokenizer_class": "CodeGenTokenizer",
|
327 |
+
"unk_token": "<|endoftext|>"
|
328 |
+
}
|
checkpoint-3000/trainer_state.json
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.7317965605561654,
|
5 |
+
"eval_steps": 1000,
|
6 |
+
"global_step": 3000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.24,
|
13 |
+
"learning_rate": 4.30097988740375e-05,
|
14 |
+
"logits/chosen": 0.4760148227214813,
|
15 |
+
"logits/rejected": 0.18961936235427856,
|
16 |
+
"logps/chosen": -138.3477783203125,
|
17 |
+
"logps/rejected": -107.43299865722656,
|
18 |
+
"loss": 0.0534,
|
19 |
+
"rewards/accuracies": 0.9904999732971191,
|
20 |
+
"rewards/chosen": -1.208303689956665,
|
21 |
+
"rewards/margins": 6.7370734214782715,
|
22 |
+
"rewards/rejected": -7.945377349853516,
|
23 |
+
"step": 1000
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.24,
|
27 |
+
"eval_logits/chosen": 0.5734850168228149,
|
28 |
+
"eval_logits/rejected": 0.30642059445381165,
|
29 |
+
"eval_logps/chosen": -140.49411010742188,
|
30 |
+
"eval_logps/rejected": -130.59207153320312,
|
31 |
+
"eval_loss": 0.021729281172156334,
|
32 |
+
"eval_rewards/accuracies": 0.994514524936676,
|
33 |
+
"eval_rewards/chosen": -1.671390414237976,
|
34 |
+
"eval_rewards/margins": 8.56452751159668,
|
35 |
+
"eval_rewards/rejected": -10.235918045043945,
|
36 |
+
"eval_runtime": 379.8081,
|
37 |
+
"eval_samples_per_second": 9.597,
|
38 |
+
"eval_steps_per_second": 4.8,
|
39 |
+
"step": 1000
|
40 |
+
},
|
41 |
+
{
|
42 |
+
"epoch": 0.49,
|
43 |
+
"learning_rate": 2.594822843866259e-05,
|
44 |
+
"logits/chosen": 0.9377780556678772,
|
45 |
+
"logits/rejected": 0.5601473450660706,
|
46 |
+
"logps/chosen": -137.52035522460938,
|
47 |
+
"logps/rejected": -132.42645263671875,
|
48 |
+
"loss": 0.0182,
|
49 |
+
"rewards/accuracies": 0.9955000281333923,
|
50 |
+
"rewards/chosen": -1.4818373918533325,
|
51 |
+
"rewards/margins": 8.971912384033203,
|
52 |
+
"rewards/rejected": -10.453749656677246,
|
53 |
+
"step": 2000
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 0.49,
|
57 |
+
"eval_logits/chosen": 1.070937156677246,
|
58 |
+
"eval_logits/rejected": 0.6229708194732666,
|
59 |
+
"eval_logps/chosen": -139.24871826171875,
|
60 |
+
"eval_logps/rejected": -137.83485412597656,
|
61 |
+
"eval_loss": 0.01748650148510933,
|
62 |
+
"eval_rewards/accuracies": 0.995063066482544,
|
63 |
+
"eval_rewards/chosen": -1.5468522310256958,
|
64 |
+
"eval_rewards/margins": 9.413342475891113,
|
65 |
+
"eval_rewards/rejected": -10.96019458770752,
|
66 |
+
"eval_runtime": 382.2871,
|
67 |
+
"eval_samples_per_second": 9.535,
|
68 |
+
"eval_steps_per_second": 4.769,
|
69 |
+
"step": 2000
|
70 |
+
},
|
71 |
+
{
|
72 |
+
"epoch": 0.73,
|
73 |
+
"learning_rate": 8.35639340331897e-06,
|
74 |
+
"logits/chosen": 1.0557544231414795,
|
75 |
+
"logits/rejected": 0.6803003549575806,
|
76 |
+
"logps/chosen": -137.27392578125,
|
77 |
+
"logps/rejected": -142.61241149902344,
|
78 |
+
"loss": 0.0162,
|
79 |
+
"rewards/accuracies": 0.9959999918937683,
|
80 |
+
"rewards/chosen": -1.7070084810256958,
|
81 |
+
"rewards/margins": 9.696061134338379,
|
82 |
+
"rewards/rejected": -11.403070449829102,
|
83 |
+
"step": 3000
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 0.73,
|
87 |
+
"eval_logits/chosen": 1.0047662258148193,
|
88 |
+
"eval_logits/rejected": 0.6324675679206848,
|
89 |
+
"eval_logps/chosen": -139.297607421875,
|
90 |
+
"eval_logps/rejected": -142.67721557617188,
|
91 |
+
"eval_loss": 0.017071055248379707,
|
92 |
+
"eval_rewards/accuracies": 0.9961601495742798,
|
93 |
+
"eval_rewards/chosen": -1.5517414808273315,
|
94 |
+
"eval_rewards/margins": 9.89268970489502,
|
95 |
+
"eval_rewards/rejected": -11.444432258605957,
|
96 |
+
"eval_runtime": 382.4643,
|
97 |
+
"eval_samples_per_second": 9.53,
|
98 |
+
"eval_steps_per_second": 4.766,
|
99 |
+
"step": 3000
|
100 |
+
}
|
101 |
+
],
|
102 |
+
"logging_steps": 1000,
|
103 |
+
"max_steps": 4099,
|
104 |
+
"num_input_tokens_seen": 0,
|
105 |
+
"num_train_epochs": 1,
|
106 |
+
"save_steps": 1000,
|
107 |
+
"total_flos": 1.348843155142738e+17,
|
108 |
+
"train_batch_size": 1,
|
109 |
+
"trial_name": null,
|
110 |
+
"trial_params": null
|
111 |
+
}
|
checkpoint-3000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e1e080eaa2d6e4dd6be714c1ceaa538ec3b328a23e36d3aded3c04c0da7f56e
|
3 |
+
size 4539
|