Define a hyperparameter grid for the GridSearchCV
algorithm, which performs hyperparameter tuning. The train_model()
function is used to train the model with the best hyperparameters obtained from GridSearchCV
. We then evaluate the trained model using the evaluate_model()
function. The best model is saved to a file using the save_model()
function. We also save the evaluation metrics and hyperparameters to separate files. Finally, the upload_to_sharepoint()
function is used to upload the model file, evaluation metrics, and hyperparameters to the specified target folder in the SharePoint Online document library.
import os
import torch
from torchvision import datasets, transforms
from sklearn.model_selection import GridSearchCV
from shareplum import Site
from shareplum import Office365
import requests
# SharePoint Online credentials and site information
username = 'your_username'
password = 'your_password'
site_url = 'https://your_sharepoint_site_url'
document_library = 'Documents'
# Function to load and preprocess the data
def load_data():
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
trainset = datasets.MNIST('data', train=True, download=True, transform=transform)
testset = datasets.MNIST('data', train=False, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)
return trainloader, testloader
# Function to define the model architecture
def create_model(hidden_units):
model = torch.nn.Sequential(
torch.nn.Linear(784, hidden_units),
torch.nn.ReLU(),
torch.nn.Linear(hidden_units, 10),
torch.nn.LogSoftmax(dim=1)
)
return model
# Function to train the model
def train_model(model, trainloader):
criterion = torch.nn.NLLLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
epochs = 5
for epoch in range(epochs):
running_loss = 0.0
for images, labels in trainloader:
images = images.view(images.shape[0], -1)
optimizer.zero_grad()
output = model(images)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
print(f"Epoch {epoch+1} - Training Loss: {running_loss/len(trainloader)}")
# Function to evaluate the model
def evaluate_model(model, testloader):
correct = 0
total = 0
with torch.no_grad():
for images, labels in testloader:
images = images.view(images.shape[0], -1)
output = model(images)
_, predicted = torch.max(output.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = correct / total
return accuracy
# Save the trained model
def save_model(model, filename):
torch.save(model.state_dict(), filename)
# Upload the model file, evaluation metrics, and hyperparameters to SharePoint Online
def upload_to_sharepoint(model_filename, metrics_filename, hyperparameters, target_folder):
with Office365(site_url, username=username, password=password) as auth:
site = Site(site_url, auth=auth)
folder = site.Folder(f'{document_library}/{target_folder}')
with open(model_filename, 'rb') as model_file:
folder.upload_file(model_file, os.path.basename(model_filename))
with open(metrics_filename, 'w') as metrics_file:
metrics_file.write(f"Accuracy: {hyperparameters['accuracy']}\n")
metrics_file.write(f"Loss: {hyperparameters['loss']}\n")
with open(hyperparameters_filename, 'w') as hyperparameters_file:
for key, value in hyperparameters.items():
hyperparameters_file.write(f"{key}: {value}\n")
print("Model, metrics, and hyperparameters uploaded to SharePoint Online.")
# Main script
if __name__ == '__main__':
# Load and preprocess the data
trainloader, testloader = load_data()
# Define the hyperparameter grid for GridSearchCV
hyperparameters_grid = {
'hidden_units': [64, 128, 256],
'lr': [0.01, 0.001, 0.0001]
}
# Perform hyperparameter tuning using GridSearchCV
model = create_model(hidden_units=64)
grid_search = GridSearchCV(model, hyperparameters_grid, scoring='accuracy', cv=3)
grid_search.fit(trainloader.dataset.data.view(-1, 784).float(), trainloader.dataset.targets)
# Get the best model and hyperparameters
best_model = grid_search.best_estimator_
best_hyperparameters = grid_search.best_params_
# Train the best model
train_model(best_model, trainloader)
# Evaluate the best model
accuracy = evaluate_model(best_model, testloader)
# Save the best model
model_filename = 'best_model.pt'
save_model(best_model, model_filename)
# Save evaluation metrics and hyperparameters
metrics_filename = 'evaluation_metrics.txt'
hyperparameters_filename = 'hyperparameters.txt'
# Upload the model file, evaluation metrics, and hyperparameters to SharePoint Online
target_folder = 'ML_Models'
upload_to_sharepoint(model_filename, metrics_filename, best_hyperparameters, target_folder)