python - word2vec on POS tags -
please check code of getting pos vectors.instead of getting pos tag vectors getting vectors of alphabets in pos.e.g instead of getting pos tags vectors cc,dt,prp etc getting vectors of c,d , p.
#get word , pos tagger def get_pos_tagger(self, document): # tokenizer tokens = nltk.word_tokenize(document) # pos tagger postagger = nltk.pos_tag(tokens=tokens) tags = [] (word, tag) in postagger: tags.append(tag) return tags def get_tag_and_training_data(self): tags=[] documents=[] line_counter=1 open(self.filename) csvfile: spamreader = csv.reader(csvfile, delimiter=",") line in spamreader: #initialize token list line tags.append(int(line[0])) documents.append(line[1].lower() + " " + line[2].lower()) return tags,documents # build pos model def buildposmodel(self): tags, documents = self.get_tag_and_training_data() sentences = [] document in documents: sentences += self.get_pos_tagger(document) print(sentences) modelpos = gensim.models.word2vec(sentences=sentences, size=100, min_count=1, window=5, workers=cores) modelpos.wv.save_word2vec_format('word2vecposmodel.bin', binary=false) return modelpos
Comments
Post a Comment