python - word2vec on POS tags -


please check code of getting pos vectors.instead of getting pos tag vectors getting vectors of alphabets in pos.e.g instead of getting pos tags vectors cc,dt,prp etc getting vectors of c,d , p.

#get word , pos tagger def get_pos_tagger(self, document):      # tokenizer     tokens = nltk.word_tokenize(document)     # pos tagger     postagger = nltk.pos_tag(tokens=tokens)     tags = []     (word, tag) in postagger:         tags.append(tag)      return tags   def get_tag_and_training_data(self):     tags=[]     documents=[]     line_counter=1      open(self.filename) csvfile:         spamreader = csv.reader(csvfile, delimiter=",")         line in spamreader:             #initialize token list line             tags.append(int(line[0]))             documents.append(line[1].lower() + " " + line[2].lower())      return tags,documents   # build pos model def buildposmodel(self):     tags, documents = self.get_tag_and_training_data()     sentences = []      document in documents:      sentences += self.get_pos_tagger(document)      print(sentences)     modelpos = gensim.models.word2vec(sentences=sentences, size=100, min_count=1, window=5, workers=cores)     modelpos.wv.save_word2vec_format('word2vecposmodel.bin', binary=false)     return modelpos 


Comments

Popular posts from this blog

ubuntu - PHP script to find files of certain extensions in a directory, returns populated array when run in browser, but empty array when run from terminal -

php - How can i create a user dashboard -

javascript - How to detect toggling of the fullscreen-toolbar in jQuery Mobile? -