We took a view AI LLM for testing. This use case receives paragraphs of a text in a text file and will answers questions on it.
-Raising Awesome.
It took a lot of study on the HuggingFace site, but here is a cheat sheet on making a Q&A AI script:
1. Get Python going on your Windows or Linux box. At the time of writing this Python 3.10 had to be used for the AI packages to work.
2. Open a terminal and create a directory for development.
3. Get hugging face going:
python -m pip install huggingface_hub
huggingface-cli login
4. Download a model by going to huggingface, visiting a models page and click the little copy button by its name. Then do this:
huggingface-cli download packagename --local-dir .
5. Make a content.txt file. It should be one line only, but can be multiple paragraphs such as from a story or book passage.
6. Create your Python script and execute it:
import transformers
import torch
from transformers import pipeline
# See https://huggingface.co/tasks/question-answering
# Function to read question from a file
def read_question_from_file(file_path):
with open(file_path, 'r') as file:
return file.read()
def get_response(question):
result = qa_model(question=question, context=context) # {'answer': 'İstanbul', 'end': 39, 'score': 0.953, 'start': 31}
return (result['answer'])
bert_pipeline = transformers.pipeline(
"question-answering",
model='distilbert-base-cased-distilled-squad' # this will result in it downloading to your user .cache directory
# preferrable, the local model address can be used:
# model=r'C:\Users\Sean-\.cache\huggingface\hub\models--distilbert--distilbert-base-cased-distilled-squad\snapshots\564e9b582944a57a3e586bbb98fd6f0a4118db7f'
)
print("setting model")
qa_model = bert_pipeline
context = read_question_from_file('content.txt') # a one line text file with paragraphs of content you want to ask questions on.
if __name__ == "__main__":
print("\n\nCLOVER is online! Type 'exit' to quit.")
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
print("Goodbye!\n\n")
break
response = get_response(user_input)
print(f"\n\nCLOVER: {response}")
Another example:
This script will take a file named documents.json, train with BERT on it, and then use T5 to give answers. The json file is after the script.
import json
import torch
import torch.nn.functional as F
from transformers import AutoModelForMaskedLM, AutoModelForQuestionAnswering, AutoModelForSeq2SeqLM, AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback
from datasets import Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
import os
# Set number of threads for parallel processing
num_cores = 12
torch.set_num_threads(num_cores)
os.environ["OMP_NUM_THREADS"] = str(num_cores)
os.environ["MKL_NUM_THREADS"] = str(num_cores)
# Load JSON data
with open('documents.json', 'r') as file:
data = json.load(file)
# Convert JSON data to a DataFrame
df = pd.DataFrame(data)
# Convert DataFrame to Dataset
dataset = Dataset.from_pandas(df)
# Split the dataset into training and evaluation sets
train_df, eval_df = train_test_split(dataset.to_pandas(), test_size=0.2)
# Convert back to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
eval_dataset = Dataset.from_pandas(eval_df)
# Load pre-trained model and tokenizer for MaskedLM
masked_lm_model_name = "deepset/bert-large-uncased-whole-word-masking-squad2"
masked_lm_model = AutoModelForMaskedLM.from_pretrained(masked_lm_model_name)
masked_lm_tokenizer = AutoTokenizer.from_pretrained(masked_lm_model_name)
# Preprocess data
def preprocess_function(examples):
inputs = masked_lm_tokenizer(examples['content'], padding='max_length', truncation=True, max_length=512)
inputs['labels'] = inputs['input_ids'] # Masked language model needs labels for masked tokens
return inputs
# Apply the preprocessing function to your datasets
train_dataset = train_dataset.map(preprocess_function, batched=True)
eval_dataset = eval_dataset.map(preprocess_function, batched=True)
# Training arguments
training_args = TrainingArguments(
output_dir='./FineTunedDocs', # Set output directory to FineTunedDocs
num_train_epochs=10, # Training epochs
per_device_train_batch_size=8,
evaluation_strategy='epoch',
save_strategy='epoch',
load_best_model_at_end=True,
metric_for_best_model='eval_loss',
greater_is_better=False, # Indicates that lower is better for the eval_loss metric
logging_dir='./logs', # Directory for storing logs
logging_steps=10,
)
# Define the trainer for MaskedLM
trainer = Trainer(
model=masked_lm_model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
callbacks=[EarlyStoppingCallback(early_stopping_patience=3)] # Patience can be adjusted as needed
)
# Fine-tune the MaskedLM model
trainer.train()
# Save the fine-tuned MaskedLM model
masked_lm_model.save_pretrained('FineTunedDocs')
masked_lm_tokenizer.save_pretrained('FineTunedDocs')
# Load fine-tuned models and tokenizers
bert_model_name = "FineTunedDocs"
bert_model = AutoModelForQuestionAnswering.from_pretrained(bert_model_name)
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model_name)
t5_model_name = "t5-base"
t5_model = AutoModelForSeq2SeqLM.from_pretrained(t5_model_name)
t5_tokenizer = AutoTokenizer.from_pretrained(t5_model_name)
# Ensure both models are in evaluation mode
bert_model.eval()
t5_model.eval()
# Load JSON data for QA
with open('documents.json', 'r') as file:
documents = json.load(file)
# Combine the content of all documents into a single context
context = " ".join([doc['content'] for doc in documents])
def extract_relevant_text(question, context):
# Tokenize input and ensure the context length is within the model's limit
inputs = bert_tokenizer(question, context[:512], return_tensors='pt', padding='max_length', truncation=True)
# Get model output
with torch.no_grad():
outputs = bert_model(**inputs)
# Get the answer span (start and end positions)
start_logits = outputs.start_logits
end_logits = outputs.end_logits
start_probs = F.softmax(start_logits, dim=-1)
end_probs = F.softmax(end_logits, dim=-1)
answer_start = torch.argmax(start_logits)
answer_end = torch.argmax(end_logits) + 1
answer_prob = (start_probs[0, answer_start] * end_probs[0, answer_end]).item()
# Decode the answer span
extracted_text = bert_tokenizer.convert_tokens_to_string(bert_tokenizer.convert_ids_to_tokens(inputs['input_ids'][0][answer_start:answer_end]))
# Handle cases where extracted text is empty or invalid
if not extracted_text.strip():
extracted_text = "Sorry, I couldn't find a relevant answer."
return extracted_text, answer_prob
def generate_human_like_response(question, extracted_text):
# Concatenate question and extracted text
input_text = f"question: {question} extracted_text: {extracted_text}"
# Tokenize input
inputs = t5_tokenizer(input_text, return_tensors='pt', padding='max_length', truncation=True, max_length=512)
# Generate response
with torch.no_grad():
outputs = t5_model.generate(inputs['input_ids'], max_length=150)
# Decode the response
response = t5_tokenizer.decode(outputs[0], skip_special_tokens=True)
# Handle cases where the generated response is not coherent
if not response.strip():
response = "Sorry, I couldn't generate a coherent response."
return response
def main():
print("Welcome to the QA responder. Type 'exit' to quit.")
while True:
user_input = input("Enter your question: ")
if user_input.lower() == "exit":
print("Goodbye!")
break
# Extract relevant text with BERT
extracted_text, probability = extract_relevant_text(user_input, context)
print(extracted_text)
# Generate a human-like response with T5
if probability < 0.000005: # You can set a threshold for confidence
response = "I don't know."
else:
response = generate_human_like_response(user_input, extracted_text)
print(f"Answer: {response}")
print(f"Confidence: {probability:.2f}")
if __name__ == "__main__":
main()
[
{
"content": "When Connor was 5, he thought it would be cool for us to make an R2D2."
},
{
"content": "Little did we know it would change the trajectory of both our lives for the next 12 years."
},
{
"content": "We went on to a journey to learn all Maker skills."
},
{
"content": "Although, as his dad, I brought to the table years of programming and practical experience working on my cars and homes, I learned along with him on electronics and metal working."
},
{
"content": "Our journey continued through his teen years. He has now launched to college majoring in Mechanical Engineering."
},
{
"content": "Along the way, we learned 3D printing, how to solder, how to weld, how to design with Autodesk Fusion 360, and on and on."
},
{
"content": "The projects covered home maintenance and construction, mechatronics, IoT, and automative repair."
},
{
"content": "We also matched our projects to life principles to help illuminate the best person he could become."
},
{
"content": "We consider our time on these projects very well spent. It gave him insights and a life plan for the world ahead."
},
{
"content": "The result is a great repository for new parents to take on such a journey with their children."
},
{
"content": "Or, at the least, it's a great site to find great Maker projects and learn to become a jack of all trades (which is way better than just a master of one)."
}
]
This code uses the Llama 8B. You download it locally.
import transformers
import torch
import os
import time
# model_id = "meta-llama/Llama-3.1-8B-Instruct"
model_id = r"D:\AI\llama8"
# Set the number of threads to the number of available CPU cores of your computer
# took 29 minutes on the HTPC
num_cores = 12 # IMPORTANT!
torch.set_num_threads(num_cores)
os.environ["OMP_NUM_THREADS"] = str(num_cores)
os.environ["MKL_NUM_THREADS"] = str(num_cores)
pipeline = transformers.pipeline(
"text-generation",
model=model_id,
model_kwargs={"torch_dtype": torch.bfloat16},
device_map="auto",
)
def get_response(question):
start_time = time.time()
print(start_time)
messages = [
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
{"role": "user", "content": question}
]
print("checking")
outputs = pipeline(
messages,
max_new_tokens=256,
)
end_time = time.time()
# Calculate the total time taken
total_time = end_time - start_time
minutes, seconds = divmod(total_time, 60)
print(f"Total time taken: {int(minutes)} minutes and {seconds:.2f} seconds")
return (outputs[0]["generated_text"][-1])
if __name__ == "__main__":
print("\n\nCLOVER is online! Type 'exit' to quit.")
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
print("Goodbye!\n\n")
break
response = get_response(user_input)
print(f"\n\nCLOVER: {response}")