Tuesday, June 5, 2007

split String into Chinese Symbol and English Word.

From Huang, Jiahua

# _zhstr , _asstr 存储 中文,非中文 数组
_zhstr = []
_asstr = []
def _fenzhas(stri):
''' 分开中文和非中文,
存入全局数组 _zhstr , _asstr
'''
global _asstr
global _zhstr
ln = len(stri)
_zhstr = []
_asstr = []
n = 0
m = 0
try:
stri[n] >= u'\u4e00'
except:
return 0
while n < ln:
if stri[n] >= u'\u4e00':
if m==0:_zhstr.append(' ')
_zhstr.append(stri[n])
## print 'z:',stri[n]
m=1
else:
if m==1:_asstr.append(' ')
_asstr.append(stri[n])
## print 'a:',stri[n]
m=2
n+=1

No comments: