• Python使用大连理工情感本体提取文本的情感倾向


    import pandas as pd
    # 导入词典
    df = pd.read_excel('Sentiment_dictionary\大连理工情感词汇本体\情感词汇本体.xlsx')
    # 我们暂时只使用 ['词语','词性种类','词义数','词义序号','情感分类','强度','极性']
    df = df[['词语', '词性种类', '词义数', '词义序号', '情感分类', '强度', '极性']]
    df.head()
    
    # 按照7大情绪划分
    Happy = []
    Good = []
    Surprise = []
    Anger = []
    Sad = []
    Fear = []
    Disgust = []
    for idx, row in df.iterrows():
        if row['情感分类'] in ['PA', 'PE']:
            Happy.append(row['词语'])
        if row['情感分类'] in ['PD', 'PH', 'PG', 'PB', 'PK']:
            Good.append(row['词语']) 
        if row['情感分类'] in ['PC']:
            Surprise.append(row['词语'])     
        if row['情感分类'] in ['NA']:
            Anger.append(row['词语'])    
        if row['情感分类'] in ['NB', 'NJ', 'NH', 'PF']:
            Sad.append(row['词语'])
        if row['情感分类'] in ['NI', 'NC', 'NG']:
            Fear.append(row['词语'])
        if row['情感分类'] in ['NE', 'ND', 'NN', 'NK', 'NL']:
            Disgust.append(row['词语'])
    Positive = Happy + Good +Surprise
    Negative = Anger + Sad + Fear + Disgust
    print('情绪词语列表整理完成')
    
    # 计情绪计算函数
    # 这里只是朴素的使用情绪词计数统计文本的情绪值
    import jieba
    import time
    def emotion_caculate(text):
        positive = 0
        negative = 0
        anger = 0
        disgust = 0
        fear = 0
        sad = 0
        surprise = 0
        good = 0
        happy = 0
        wordlist = jieba.lcut(text)
        wordset = set(wordlist)
        wordfreq = []
        for word in wordset:
            freq = wordlist.count(word)
            if word in Positive:
                positive+=freq
            if word in Negative:
                negative+=freq
            if word in Anger:
                anger+=freq
            if word in Disgust:
                disgust+=freq
            if word in Fear:
                fear+=freq
            if word in Sad:
                sad+=freq
            if word in Surprise:
                surprise+=freq
            if word in Good:
                good+=freq
            if word in Happy:
                happy+=freq
        emotion_info = {
            'length':len(wordlist),
            'positive': positive,
            'negative': negative,
            'anger': anger,
            'disgust': disgust,
            'fear':fear,
            'good':good,
            'sadness':sad,
            'surprise':surprise,
            'happy':happy,
        }
        indexs = ['length', 'positive', 'negative', 'anger', 'disgust','fear','sadness','surprise', 'good', 'happy']
        return pd.Series(emotion_info, index=indexs)
    emotion_caculate(text='这个国家再对这些制造假冒伪劣食品药品的人手软的话,那后果真的会相当糟糕。坐牢?从快判个死刑!')
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86

    输出结果:
    输出结果

  • 相关阅读:
    Linux服务器如何处理无法删除的文件?
    【老生谈算法】matlab实现卡尔曼滤波算法源码——卡尔曼滤波
    UDP和TCP特点(部分)对比:
    Blog项目切换Markdown编辑器———LayUI弹出层弹出写在页面的内容导致的各种bug
    “蔚来杯“2022牛客暑期多校训练营7 JK题解
    Nginx优化
    塔式服务器介绍
    数据库优化
    系统集成|第十一章(笔记)
    Linux shell脚本进阶使用
  • 原文地址:https://blog.csdn.net/m0_37134868/article/details/134477781