Sentiment python code

 import pandas as pd

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from torch.nn.functional import softmax
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
from nltk.tokenize import sent_tokenize

nltk.download('vader_lexicon')

model = AutoModelForSequenceClassification.from_pretrained('ProsusAI/finBERT')
tokenizer = AutoTokenizer.from_pretrained('ProsusAI/finBERT')
sia = SentimentIntensityAnalyzer()

keywords_df = pd.read_excel('keywords.xlsx')
ads_df = pd.read_excel('ads.xlsx')

def split_text(text, chunk_size):
    return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]

def analyze_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    probabilities = softmax(outputs.logits, dim=1)
    return probabilities[0]

for column in ads_df.columns:
    counts_df = pd.DataFrame(columns=['Key Word Category', 'Keyword', 'Paragraph', 'Sentiment Score', 'Magnitude'])

    for index, row in keywords_df.iterrows():
        keyword = row['Key Words/Topics']
        category = row['Key Word Category']

        paragraphs = ads_df[column].apply(lambda x: str(x) if keyword.lower() in str(x).lower() else None).dropna()

        for paragraph in paragraphs:
            chunks = split_text(paragraph, 1024)
            for chunk in chunks:
                probabilities = analyze_sentiment(chunk)
                sentiment_score = (probabilities[1] + (probabilities[2] * 2) + (probabilities[0] * 3)) - 2
                
                sentences = sent_tokenize(chunk)
                magnitudes = []
                for sentence in sentences:
                    sentence_probabilities = analyze_sentiment(sentence)
                    sentence_sentiment_score = (sentence_probabilities[1] + (sentence_probabilities[2] * 2) + (sentence_probabilities[0] * 3)) - 2
                    sentence_magnitude = abs(sia.polarity_scores(sentence)['compound'])
                    magnitudes.append(sentence_magnitude)
                
                total_magnitude = sum(magnitudes)
                
                new_row = {'Key Word Category': category, 'Keyword': keyword, 'Paragraph': chunk, 'Sentiment Score': sentiment_score.item(), 'Magnitude': total_magnitude}
                counts_df = pd.concat([counts_df, pd.DataFrame([new_row])], ignore_index=True)

    counts_df.to_excel(f'output_{column}.xlsx', index=False)

No comments:

Post a Comment

Notes 3-18-25

https://uconn-sa.blogspot.com/  We were able to launch an app engine program from our compute engine instance.   I'd like to get all wo...