Duyu commited on
Commit
15077ed
·
verified ·
1 Parent(s): a73303c

Upload py2hz.py

Browse files
Files changed (1) hide show
  1. py2hz.py +5 -3
py2hz.py CHANGED
@@ -1,7 +1,7 @@
1
  # _*_ coding:utf-8 _*_
2
  """
3
- @Version : 1.0.0
4
- @Time : 2024年12月27
5
  @Author : DuYu (@duyu09, [email protected])
6
  @File : py2hz.py
7
  @Describe : 基于隐马尔可夫模型(HMM)的拼音转汉字程序。
@@ -66,6 +66,8 @@ def train_hmm(sentences, pinyins, hanzi2id, pinyin2id):
66
  pinyin_seq = [pinyin2id[p] for p in pinyin.split()]
67
 
68
  # 初始状态概率
 
 
69
  start_prob[hanzi_seq[0]] += 1
70
 
71
  # 转移概率
@@ -140,5 +142,5 @@ def pred(model_path='hmm_model.pkl.bz2', pinyin_str='ce4 shi4'):
140
  print('预测结果:', result)
141
 
142
  if __name__ == '__main__':
143
- # train(dataset_path='train.csv', model_path='hmm_model.pkl.bz2')
144
  pred(model_path='hmm_model.pkl.bz2', pinyin_str='hong2 yan2 bo2 ming4')
 
1
  # _*_ coding:utf-8 _*_
2
  """
3
+ @Version : 1.1.0
4
+ @Time : 2024年12月28
5
  @Author : DuYu (@duyu09, [email protected])
6
  @File : py2hz.py
7
  @Describe : 基于隐马尔可夫模型(HMM)的拼音转汉字程序。
 
66
  pinyin_seq = [pinyin2id[p] for p in pinyin.split()]
67
 
68
  # 初始状态概率
69
+ if len(hanzi_seq) == 0:
70
+ continue
71
  start_prob[hanzi_seq[0]] += 1
72
 
73
  # 转移概率
 
142
  print('预测结果:', result)
143
 
144
  if __name__ == '__main__':
145
+ # train(dataset_path='train_o.csv', model_path='hmm_model.pkl.bz2')
146
  pred(model_path='hmm_model.pkl.bz2', pinyin_str='hong2 yan2 bo2 ming4')