ARB Security Solutions, LLC.

Sentiment Analysis With PyTorch ML On Feedback Data in SharePoint Online

Connect to SharePoint Online and extract customer feedback data from a SharePoint Online list named CustomerFeedback. The feedback and corresponding sentiment labels are then loaded into a custom PyTorch dataset, which is used to train a DistilBERT-based sentiment analysis model. The trained model is then used to predict the sentiment labels for a test dataset consisting of three sample feedback statements.

# Import required libraries
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import torch
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Set up SharePoint Online connection
url = 'https://yourcompany.sharepoint.com/sites/sitename'
username = '[email protected]'
password = 'yourpassword'
auth_context = AuthenticationContext(url)
auth_context.acquire_token_for_user(username, password)
ctx = ClientContext(url, auth_context)

# Get data from SharePoint Online list
list_name = 'CustomerFeedback'
feedback_list = ctx.web.lists.get_by_title(list_name)
feedback_items = feedback_list.items
ctx.load(feedback_items)
ctx.execute_query()

# Extract feedback and sentiment label from the list
feedback = []
sentiment_label = []
for item in feedback_items:
    feedback.append(item.Feedback)
    sentiment_label.append(item.Sentiment)

# Define a custom dataset to load data into PyTorch
class FeedbackDataset(Dataset):
    def __init__(self, feedback, sentiment_label, tokenizer):
        self.feedback = feedback
        self.sentiment_label = sentiment_label
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.feedback)

    def __getitem__(self, index):
        text = self.feedback[index]
        label = self.sentiment_label[index]
        encoding = self.tokenizer(text, truncation=True, padding=True, return_tensors='pt')
        return {'input_ids': encoding['input_ids'][0], 'attention_mask': encoding['attention_mask'][0], 'labels': torch.tensor(label)}

# Define your model architecture
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased')

# Create an instance of your dataset and dataloader
dataset = FeedbackDataset(feedback, sentiment_label, tokenizer)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

# Define your loss function and optimizer
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=2e-5)

# Train your model
for epoch in range(3):
    running_loss = 0.0
    for i, batch in enumerate(dataloader):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print('Epoch %d loss: %.3f' % (epoch + 1, running_loss / len(dataset)))

# Use your trained model for prediction
test_data = ['I love this product', 'I hate this product', 'This product is okay']
test_encoding = tokenizer(test_data, truncation=True, padding=True, return_tensors='pt')
test_input_ids = test_encoding['input_ids'].to(device)
test_attention_mask = test_encoding['attention_mask'].to(device)
test_output = model(test_input_ids, attention_mask=test_attention_mask)
test_predictions = torch.argmax(test_output.logits, axis=1).tolist()

# Print the predictions
print(test_predictions)

Comments are closed.