给定一组(X,Y)形式的坐标,任务是找到可以形成的最小回归线。
In statistics, Linear Regression is a linear approach to model the relationship between a scalar response (or dependent variable), say Y, and one or more explanatory variables (or independent variables), say X.
Regression Line: If our data shows a linear relationship between X and Y, then the straight line which best describes the relationship is the regression line. It is the straight line that covers the maximum points in the graph.
例子:
Input: X = [95, 85, 80, 70, 60]
Y = [90, 80, 70, 65, 60]
Output: Y = 5.685 + 0.863*X
Explanation:
The graph of the data given below is:
X = [95, 85, 80, 70, 60]
Y = [90, 80, 70, 65, 60]
The regression line obtained is Y = 5.685 + 0.863*X
The graph shows that the regression line is the line that covers the maximum of the points.
Input: X = [100, 95, 85, 80, 70, 60]
Y = [90, 95, 80, 70, 65, 60]
Output: Y = 4.007 + 0.89*X
方法:
A regression line is given as Y = a + b*X where the formula of b and a are given as:
b = (nΣ(xiyi) – Σ(xi)Σ(yi)) ÷ (nΣ(xi2)-Σ(xi)2)
a = ȳ – b.x̄
where x̄ and ȳ are mean of x and y respectively.
- 要找到回归线,我们需要找到a和b。
- 计算a,由
- 计算b,由下式给出
- 将a和b的值放在回归线方程中。
下面是上述方法的实现。
C++
// C++ program to find the
// regression line
#include
using namespace std;
// Function to calculate b
double calculateB(int x[], int y[], int n)
{
// sum of array x
int sx = accumulate(x, x + n, 0);
// sum of array y
int sy = accumulate(y, y + n, 0);
// for sum of product of x and y
int sxsy = 0;
// sum of square of x
int sx2 = 0;
for(int i = 0; i < n; i++)
{
sxsy += x[i] * y[i];
sx2 += x[i] * x[i];
}
double b = (double)(n * sxsy - sx * sy) /
(n * sx2 - sx * sx);
return b;
}
// Function to find the
// least regression line
void leastRegLine( int X[], int Y[], int n)
{
// Finding b
double b = calculateB(X, Y, n);
int meanX = accumulate(X, X + n, 0) / n;
int meanY = accumulate(Y, Y + n, 0) / n;
// Calculating a
double a = meanY - b * meanX;
// Printing regression line
cout << ("Regression line:") << endl;
cout << ("Y = ");
printf("%.3f + ", a);
printf("%.3f *X", b);
}
// Driver code
int main()
{
// Statistical data
int X[] = { 95, 85, 80, 70, 60 };
int Y[] = { 90, 80, 70, 65, 60 };
int n = sizeof(X) / sizeof(X[0]);
leastRegLine(X, Y, n);
}
// This code is contributed by PrinciRaj1992
Java
// Java program to find the
// regression line
import java.util.Arrays;
public class GFG {
// Function to calculate b
private static double calculateB(
int[] x, int[] y)
{
int n = x.length;
// sum of array x
int sx = Arrays.stream(x).sum();
// sum of array y
int sy = Arrays.stream(y).sum();
// for sum of product of x and y
int sxsy = 0;
// sum of square of x
int sx2 = 0;
for (int i = 0; i < n; i++) {
sxsy += x[i] * y[i];
sx2 += x[i] * x[i];
}
double b = (double)(n * sxsy - sx * sy)
/ (n * sx2 - sx * sx);
return b;
}
// Function to find the
// least regression line
public static void leastRegLine(
int X[], int Y[])
{
// Finding b
double b = calculateB(X, Y);
int n = X.length;
int meanX = Arrays.stream(X).sum() / n;
int meanY = Arrays.stream(Y).sum() / n;
// calculating a
double a = meanY - b * meanX;
// Printing regression line
System.out.println("Regression line:");
System.out.print("Y = ");
System.out.printf("%.3f", a);
System.out.print(" + ");
System.out.printf("%.3f", b);
System.out.print("*X");
}
// Driver code
public static void main(String[] args)
{
// statistical data
int X[] = { 95, 85, 80, 70, 60 };
int Y[] = { 90, 80, 70, 65, 60 };
leastRegLine(X, Y);
}
}
Python3
# Python program to find the
# regression line
# Function to calculate b
def calculateB(x, y, n):
# sum of array x
sx = sum(x)
# sum of array y
sy = sum(y)
# for sum of product of x and y
sxsy = 0
# sum of square of x
sx2 = 0
for i in range(n):
sxsy += x[i] * y[i]
sx2 += x[i] * x[i]
b = (n * sxsy - sx * sy)/(n * sx2 - sx * sx)
return b
# Function to find the
# least regression line
def leastRegLine(X,Y,n):
# Finding b
b = calculateB(X, Y, n)
meanX = int(sum(X)/n)
meanY = int(sum(Y)/n)
# Calculating a
a = meanY - b * meanX
# Printing regression line
print("Regression line:")
print("Y = ", '%.3f'%a, " + ", '%.3f'%b, "*X", sep="")
# Driver code
# Statistical data
X = [95, 85, 80, 70, 60 ]
Y = [90, 80, 70, 65, 60 ]
n = len(X)
leastRegLine(X, Y, n)
# This code is contributed by avanitrachhadiya2155
C#
// C# program to find the
// regression line
using System;
using System.Linq;
class GFG{
// Function to calculate b
private static double calculateB(int[] x,
int[] y)
{
int n = x.Length;
// Sum of array x
int sx = x.Sum();
// Sum of array y
int sy = y.Sum();
// For sum of product of x and y
int sxsy = 0;
// Sum of square of x
int sx2 = 0;
for(int i = 0; i < n; i++)
{
sxsy += x[i] * y[i];
sx2 += x[i] * x[i];
}
double b = (double)(n * sxsy - sx * sy) /
(n * sx2 - sx * sx);
return b;
}
// Function to find the
// least regression line
public static void leastRegLine(int []X, int []Y)
{
// Finding b
double b = calculateB(X, Y);
int n = X.Length;
int meanX = X.Sum() / n;
int meanY = Y.Sum() / n;
// Calculating a
double a = meanY - b * meanX;
// Printing regression line
Console.WriteLine("Regression line:");
Console.Write("Y = ");
Console.Write("{0:F3}",a );
Console.Write(" + ");
Console.Write("{0:F3}", b);
Console.Write("*X");
}
// Driver code
public static void Main(String[] args)
{
// Statistical data
int []X = { 95, 85, 80, 70, 60 };
int []Y = { 90, 80, 70, 65, 60 };
leastRegLine(X, Y);
}
}
// This code is contributed by gauravrajput1
Regression line:
Y = 5.685 + 0.863*X