Python汉字转拼音
发布时间:2020-05-24 23:28:39 所属栏目:Python 来源:互联网
导读:Python汉字转拼音
|
下面是脚本之家 jb51.cc 通过网络收集整理的代码片段。 脚本之家小编现在分享给大家,也给大家做个参考。 self.has_shengdiao = False self.just_shengmu = False self.spliter = '-' # -*- coding: utf-8 -*-
# ------------------------------------------------------------
# Script Name: convert.py
# Creation Date: 2010-09-21 02:12
# Last Modified: 2011-11-12 18:38:13
# Copyright (c)2011,DDTCMS Project
# Purpose: This file used for DDTCMS Project
# ------------------------------------------------------------
#####################################
# Written by caocao #
# Modified by [emailprotected] #
# [emailprotected] #
# http://nethermit.yeah.net #
#####################################
# python.
import sys,os
import re
import string
class CConvert:
def __init__(self):
self.has_shengdiao = False
self.just_shengmu = False
self.spliter = '-'
"Load data table"
try:
fp=open(os.path.join(settings.PROJECT_DIR,'utils','convert-utf-8.txt'))
except IOError:
print "Can't load data from convert-utf-8.txtnPlease make sure this file exists."
sys.exit(1)
else:
self.data=fp.read().decode("utf-8")# decoded data to unicode
fp.close()
def convert1(self,strIn):
"Convert Unicode strIn to PinYin"
length,strOutKey,strOutValue,i=len(strIn),"",0
while i<length:
code1 =ord(strIn[i:i+1])
if code1>=0x4e02 and code1<=0xe863:
strTemp = self.getIndex(strIn[i:i+1])
if not self.has_shengdiao:
strTemp = strTemp[:-1]
strLength = len(strTemp)
if strLength<1:strLength=1
strOutKey += string.center(strIn[i:i+1],strLength)+" "
strOutValue += self.spliter + string.center(strTemp,strLength) + self.spliter
else:#ascii code;
strOutKey+=strIn[i:i+1]+" "
strOutValue+=strIn[i:i+1] + ' '
i+=1
#############################
#txlist = utf8String.split()
#out=convert.convert(utf8String)
#l=[]
#for t in map(convert.convert,txlist):
# l.append(t[0])
#v = '-'.join(l).replace(' ','').replace(u'--','-').strip('-')
#############################
return [strOutValue,strOutKey]
def getIndex(self,strIn):
"Convert single Unicode to PinYin from index"
if strIn==' ':return self.spliter
if set(strIn).issubset("'"`[emailprotected]#$%^&*()=+[]{}|;:,.<>/?"):return self.spliter # or return ""
if set(strIn).issubset("-—!##%%&&()*,、。:;?? @@\{{|}}~~‘’“”《》【】++==×¥·… ".decode("utf-8")):return ""
pos=re.search("^"+strIn+"([0-9a-zA-Z]+)",self.data,re.M)
if pos==None:
return strIn
else:
if not self.just_shengmu:
return pos.group(1)
else:
return pos.group(1)[:1]
def convert(self,strIn):
"Convert Unicode strIn to PinYin"
if self.spliter != '-' and self.spliter !='_' and self.spliter != '' and self.spliter != ' ':
self.spliter = '-'
pinyin_list=[]
for c in strIn :
pinyin_list.append(self.getIndex(c))
pinyin=''
for p in pinyin_list:
if p==' ':
pinyin+= self.spliter
continue
if len(p)<2:# only shengmu,just get one char,or number
#if p.isdigit():
# pinyin += p + ' '
#else:
# pinyin += p + ' '
pinyin += p + ' '
else:
if not self.has_shengdiao: p = p[:-1]
pinyin += self.spliter + p + self.spliter
pinyin = pinyin.replace(' ','')
.replace(self.spliter+self.spliter,self.spliter)
.strip(self.spliter+' ').replace(self.spliter+self.spliter,self.spliter)
return pinyin
以上是脚本之家(jb51.cc)为你收集整理的全部代码内容,希望文章能够帮你解决所遇到的程序开发问题。 如果觉得脚本之家网站内容还不错,欢迎将脚本之家网站推荐给程序员好友。 (编辑:安卓应用网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
