-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstream_get.py
More file actions
112 lines (87 loc) · 2.62 KB
/
stream_get.py
File metadata and controls
112 lines (87 loc) · 2.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#---Streaming Get-------------
import html
from mastodon import Mastodon, StreamListener
import MeCab
import json, os, re, random
import re
dict_file = "test.json"
dic = {}
if os.path.exists(dict_file):
dic = json.load(open(dict_file, "r"))
def mecab_analisys(text):
tagger = MeCab.Tagger('-Ochasen')
tagger.parse("")
result = tagger.parseToNode(text)
word_class = []
while result:
word = result.surface
clazz = result.feature.split(',')[0]
if clazz != u'BOS/EOS':
word_class.append((word, clazz))
result = result.next
return word_class
def register_dic(words):
global dic
tmp = ["!"]
for w in words:
word = w[0]
if word == "" or word == "\r\n" or word == "\n":
continue
tmp.append(word)
if len(tmp) < 3:
continue
if len(tmp) > 3:
tmp = tmp[1:]
set_word3(dic, tmp)
if word == "。" or word == "?":
tmp = ["!"]
continue
json.dump(dic, open(dict_file, "w", encoding="utf-8"))
def set_word3(dic , s3):
w1, w2, w3 = s3
if not w1 in dic:
dic[w1] = {}
if not w2 in dic[w1]:
dic[w1][w2] = {}
if not w3 in dic[w1][w2]:
dic[w1][w2][w3] = 0
dic[w1][w2][w3] += 1
def remove_tag(html):
return re.sub(r"<[^>]+?>", '', html)
def to_oneline(html):
return html.replace("<br />", ' ').replace("</p><p>", ' ').replace('\n', '\\n')
def remove_mention(content):
return content.replace("@", "")
def remove_hashtag(content):
return content.replace("#", "")
def remove_image(content, status):
for media in status.media_attachments:
content = content.replace(media.text_url, "")
return content
mastodon = Mastodon(
client_id="my_clientcred_workers.txt",
access_token="my_usercred_workers.txt",
api_base_url = "https://mstdn-workers.com"
)
class MyStreamListener(StreamListener):
def __init__(self):
super(MyStreamListener, self).__init__()
def handle_stream(self, response):
try:
super().handle_stream(response)
except:
raise
def on_update(self, status):
content = html.unescape(remove_tag(to_oneline(status['content'])))
content = remove_mention(content)
content = remove_hashtag(content)
content = remove_image(content, status)
print(content)
if content[-1] != "。":
content += "。"
words = mecab_analisys(content)
register_dic(words)
def on_delete(self, status_id):
pass
listener = MyStreamListener()
mastodon.stream_local(listener)