用数据库存上也是慢,内存里缓存起来性能最好了,下面代码大概意思是 converter 先统计好索引到数组,然后把数组写入到文件,finder 读入文件初始化数组,然后再查找。没仔细调试,因为太烧机器了,有兴趣的同学可以完善下:
1.
converter.py```python
# -*- coding:utf-8 -*-
#!/usr/bin/python3
import datetime
class PIConverter:
def __init__(self, minNum=100000, maxNum=99999999):
self.minNum = minNum
self.maxNum = maxNum
self.positions = [0]*(self.maxNum+1-self.minNum)
def convert(self, srcFile, dstFile):
fsrc = open(srcFile,'r')
fsrc.read(2)
try:
lastStr = ""
readSize = 1024*8
currPos = 0
readed = 0
starttime = datetime.datetime.now()
offset = len(str(self.minNum)) - 1
while True:
s = fsrc.read(readSize)
s = lastStr + s # 这里可以再优化下
currPos -= len(lastStr)
for i in range(len(s)-8):
strLen = len(str(self.minNum))
while strLen <= len(str(self.maxNum)):
subs = s[i:i+strLen]
strLen += 1
num = int(subs)
index = num - self.minNum
if self.positions[index] == 0:
self.positions[index] = currPos + i
if len(s) == 0:
break
lastStr = s[len(s)-5:]
currPos += readSize
readed += readSize
if readed % (1024*1024*8) == 0:
print("total read: {}, time used: {}s".format(readed, (datetime.datetime.now() - starttime).seconds))
print("total read: {}, time used: {}s".format(readed, (datetime.datetime.now() - starttime).seconds))
print("done")
try:
fdst = open(dstFile,'rw+')
for index in range(self.positions):
fdst.write(str(index)+"\n")
finally:
fdst.close()
finally:
fsrc.close()
def find(self, n):
if n < self.minNum or n > 99999999:
return -1
return self.positions[n - self.minNum]
piConverter = PIConverter()
# 把已经统计出来的生成更小的文件
piConverter.convert("./pi-billion.txt", "./pi-position.txt")
# converter 初始化太慢了,所以最好还是先 piConverter.convert 把已经统计出来的生成更小的文件,
finder.py 用该文件初始化和做查找
# print("141592:", piConverter.find(141592))
# print("415926:", piConverter.find(415926))
```
2.
finder.py```python
# -*- coding:utf-8 -*-
#!/usr/bin/python3
class PIFinder:
def __init__(self, fname, minNum=100000, maxNum=99999999):
self.minNum = minNum
self.maxNum = maxNum
self.positions = [0]*(self.maxNum+1-self.minNum)
f = open(fname,'r')
try:
i = 0
for line in f:
num = int(line)
self.positions[i] = num
finally:
f.close()
def find(self, n):
if n < self.minNum or n > 99999999:
return -1
return self.positions[n - self.minNum]
piFinder = PIFinder("./pi-position.txt")
print("141592:", piFinder.find(141592))
print("415926:", piFinder.find(415926))
```