使用python删除曲线下方的数据点
|
我需要将一些理论数据与
python中的实际数据进行比较.
理论曲线和数据点都是不同长度的阵列. 我可以尝试以粗略的方式移除点,例如:可以使用以下方法检测第一个上点: data2[(data2.redshift<0.4)&data2.dmodulus>1]
rec.array([('1997o',0.374,1.0203223485103787,0.44354759972859786)],dtype=[('SN_name','|S10'),('redshift','<f8'),('dmodulus',('dmodulus_error','<f8')])
但我想用一种不太粗略的方式. 那么,任何人都可以帮我找到一个简单的方法来消除问题点吗? 谢谢! 解决方法这可能是过度的,并且基于您的评论
我会做以下事情: >截断数据集,使其x值位于理论集的最大值和最小值之内. 我认为这是一个接近你所寻找的脚本.它有望帮助您实现您想要的目标: import numpy as np
import scipy.interpolate as interpolate
import matplotlib.pyplot as plt
# make up data
def makeUpData():
'''Make many more data points (x,y,yerr) than theory (x,y),with theory yerr corresponding to a constant "sigma" in y,about x,y value'''
NX= 150
dataX = (np.random.rand(NX)*1.1)**2
dataY = (1.5*dataX+np.random.rand(NX)**2)*dataX
dataErr = np.random.rand(NX)*dataX*1.3
theoryX = np.arange(0,1,0.1)
theoryY = theoryX*theoryX*1.5
theoryErr = 0.5
return dataX,dataY,dataErr,theoryX,theoryY,theoryErr
def makeSameXrange(theoryX,dataX,dataY):
'''
Truncate the dataX and dataY ranges so that dataX min and max are with in
the max and min of theoryX.
'''
minT,maxT = theoryX.min(),theoryX.max()
goodIdxMax = np.where(dataX<maxT)
goodIdxMin = np.where(dataX[goodIdxMax]>minT)
return (dataX[goodIdxMax])[goodIdxMin],(dataY[goodIdxMax])[goodIdxMin]
# take 'theory' and get values at every 'data' x point
def theoryYatDataX(theoryX,dataX):
'''For every dataX point,find interpolated thoeryY value. theoryx needed
for interpolation.'''
f = interpolate.interp1d(theoryX,theoryY)
return f(dataX[np.where(dataX<np.max(theoryX))])
# collect valid points
def findInlierSet(dataX,interpTheoryY,thoeryErr):
'''Find where theoryY-theoryErr < dataY theoryY+theoryErr and return
valid indicies.'''
withinUpper = np.where(dataY<(interpTheoryY+theoryErr))
withinLower = np.where(dataY[withinUpper]
>(interpTheoryY[withinUpper]-theoryErr))
return (dataX[withinUpper])[withinLower],(dataY[withinUpper])[withinLower]
def findOutlierSet(dataX,thoeryErr):
'''Find where theoryY-theoryErr < dataY theoryY+theoryErr and return
valid indicies.'''
withinUpper = np.where(dataY>(interpTheoryY+theoryErr))
withinLower = np.where(dataY<(interpTheoryY-theoryErr))
return (dataX[withinUpper],dataY[withinUpper],dataX[withinLower],dataY[withinLower])
if __name__ == "__main__":
dataX,theoryErr = makeUpData()
TruncDataX,TruncDataY = makeSameXrange(theoryX,dataY)
interpTheoryY = theoryYatDataX(theoryX,TruncDataX)
inDataX,inDataY = findInlierSet(TruncDataX,TruncDataY,theoryErr)
outUpX,outUpY,outDownX,outDownY = findOutlierSet(TruncDataX,theoryErr)
#print inlierIndex
fig = plt.figure()
ax = fig.add_subplot(211)
ax.errorbar(dataX,fmt='.',color='k')
ax.plot(theoryX,'r-')
ax.plot(theoryX,theoryY+theoryErr,'r--')
ax.plot(theoryX,theoryY-theoryErr,'r--')
ax.set_xlim(0,1.4)
ax.set_ylim(-.5,3)
ax = fig.add_subplot(212)
ax.plot(inDataX,inDataY,'ko')
ax.plot(outUpX,'bo')
ax.plot(outDownX,outDownY,'ro')
ax.plot(theoryX,3)
fig.savefig('findInliers.png')
这个数字是结果: (编辑:安卓应用网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
