
huangapple 未分类评论46阅读模式

Java: How to sum all the values of one column based on the criteria in a second column using HashMaps





public class FlightData {
    HashMap<String,String[]> dataMap;
    public static void main(String[] args) {
        FlightData map1 = new FlightData();
        map1.dataMap = map1.createHashMap();

        HashMap<String, Integer> planeMileages = new HashMap();
        //Filling the Array with all tail numbers
        for (String[] value : map1.dataMap.values()) {
            if(planeMileages.containsKey(value[4])) {  
                int accumulatedMileage = planeMileages.get(value[4]) + Integer.parseInt(value[21]);
                planeMileages.put(value[4], accumulatedMileage);
            else {
                planeMileages.put(value[4], Integer.parseInt(value[21]));

        String maxKey = Collections.max(planeMileages.entrySet(), Map.Entry.comparingByValue()).getKey();

    public HashMap<String,String[]> createHashMap() {
        File flightFile = new File("flights.csv");
        HashMap<String,String[]> flightsMap = new HashMap<String,String[]>();
        try {
            Scanner s = new Scanner(flightFile);
            while (s.hasNextLine()) {
                String info = s.nextLine();
                String [] piecesOfInfo = info.split(",");
                String flightKey = piecesOfInfo[4] + "_" + piecesOfInfo[2] + "_" + piecesOfInfo[11]; //Setting the Key
                String[] values = Arrays.copyOfRange(piecesOfInfo, 0, piecesOfInfo.length);
                flightsMap.put(flightKey, values);
        catch (FileNotFoundException e) {
            System.out.println("Cannot open: " + flightFile);

        return flightsMap;


DayofMonth	DayOfWeek	FlightDate	UniqueCarrier	TailNum	OriginAirportID	Origin	OriginStateName	DestAirportID	Dest	DestStateName	DepTime	DepDelay	WheelsOff	WheelsOn	ArrTime	ArrDelay	Cancelled	CancellationCode	Diverted	AirTime	Distance
3	1	10/3/2016	AA	N786AA	10721	BOS	Massachusetts	12478	JFK	New York	556	-4	623	703	709	-6	0		0	40	187
4	2	10/4/2016	AA	N794AA	10721	BOS	Massachusetts	12478	JFK	New York	554	-6	615	703	712	-3	0		0	48	187
1	6	10/1/2016	AA	N783AA	12478	JFK	New York	12892	LAX	California	823	-7	844	1104	1111	-30	0		0	320	2475
2	7	10/2/2016	AA	N798AA	12478	JFK	New York	12892	LAX	California	847	17	904	1131	1159	18	0		0	327	2475
3	1	10/3/2016	AA	N786AA	12478	JFK	New York	12892	LAX	California	825	-5	838	1109	1131	-10	0		0	331	2475
4	2	10/4/2016	AA	N794AA	12478	JFK	New York	12892	LAX	California	826	-4	848	1114	1132	-9	0		0	326	2475

I have a CSV file that contains roughly 500,000 rows and 22 columns of flight data. The 5th Column contains the tail number of each plane for each flight. The 22nd column contains the distance traveled for each flight. I'm attempting to sum the total distance traveled (column 22) for each tail number (column 5).

I created a HashMap containing all data named map1. I created a 2nd HashMap named planeMileages to place each flight number and its total distance traveled into. I'm using a nested if statement go through each line of map1 and see if the tail number is already contained in planeMileages. If it is in planeMileages, then I want to add on to the accumulatedMileagesfor that key. If it is not contained, I'd like to input the key along with it's first distance value.

The current code that I've written seems sound to me, but it is producing the wrong result, outputting the incorrect tail number. Can you please take a look and let me know what I am overlooking in my main method? Thanks!

public class FlightData {
    HashMap&lt;String,String[]&gt;  dataMap;
        public static void main(String[] args) {
            FlightData map1 = new FlightData();
            map1.dataMap = map1.createHashMap();

            HashMap&lt;String, Integer&gt; planeMileages = new HashMap();
            //Filling the Array with all tail numbers
            for (String[] value : map1.dataMap.values()) {
            	if(planeMileages.containsKey(value[4])) {  
                	int accumulatedMileage = planeMileages.get(value[4]) + Integer.parseInt(value[21]);
            		planeMileages.put(value[4], accumulatedMileage);
            	else {
            String maxKey = Collections.max(planeMileages.entrySet(), Map.Entry.comparingByValue()).getKey();


       public HashMap&lt;String,String[]&gt; createHashMap() {
            File flightFile = new File(&quot;flights.csv&quot;);
            HashMap&lt;String,String[]&gt; flightsMap = new HashMap&lt;String,String[]&gt;();
            try {
            Scanner s = new Scanner(flightFile);
            while (s.hasNextLine()) {
                    String info = s.nextLine();
                    String [] piecesOfInfo = info.split(&quot;,&quot;);
                    	String flightKey = piecesOfInfo[4] + &quot;_&quot; + piecesOfInfo[2] + &quot;_&quot; + piecesOfInfo[11]; //Setting the Key
                        String[] values = Arrays.copyOfRange(piecesOfInfo, 0, piecesOfInfo.length);
                        flightsMap.put(flightKey, values);
           catch (FileNotFoundException e)
             System.out.println(&quot;Cannot open: &quot; + flightFile);

            return flightsMap;

Please see a few lines of my CSV file below:


DayofMonth	DayOfWeek	FlightDate	UniqueCarrier	TailNum	OriginAirportID	Origin	OriginStateName	DestAirportID	Dest	DestStateName	DepTime	DepDelay	WheelsOff	WheelsOn	ArrTime	ArrDelay	Cancelled	CancellationCode	Diverted	AirTime	Distance
3	1	10/3/2016	AA	N786AA	10721	BOS	Massachusetts	12478	JFK	New York	556	-4	623	703	709	-6	0		0	40	187
4	2	10/4/2016	AA	N794AA	10721	BOS	Massachusetts	12478	JFK	New York	554	-6	615	703	712	-3	0		0	48	187
1	6	10/1/2016	AA	N783AA	12478	JFK	New York	12892	LAX	California	823	-7	844	1104	1111	-30	0		0	320	2475
2	7	10/2/2016	AA	N798AA	12478	JFK	New York	12892	LAX	California	847	17	904	1131	1159	18	0		0	327	2475
3	1	10/3/2016	AA	N786AA	12478	JFK	New York	12892	LAX	California	825	-5	838	1109	1131	-10	0		0	331	2475
4	2	10/4/2016	AA	N794AA	12478	JFK	New York	12892	LAX	California	826	-4	848	1114	1132	-9	0		0	326	2475


得分: 3


你扩展了 HashMap 并添加了两个新方法,一个用于添加航班,另一个用于计算总距离。
这样一来,你就不需要不断地将修改后的值从 HashMap 中移除和添加回去。

import java.util.*;

public class Main {

    public static void main(String[] args) {
        FlightData flightData = getFlightDataFromFile();
        flightData.getDistanceTraveled("tail number");

    public static FlightData getFlightDataFromFile() {
        File flightFile = new File("flights.csv");
        FlightData flightData = new FlightData();

        try {
            Scanner s = new Scanner(flightFile);
            while (s.hasNextLine()) {
                String info = s.nextLine();
                String[] piecesOfInfo = info.split(",");
                String tailNr = piecesOfInfo[4];
                Flight flight = new Flight(piecesOfInfo[6], piecesOfInfo[9], Integer.parseInt(piecesOfInfo[21]));
                flightData.addFlight(tailNr, flight);
        } catch (FileNotFoundException e) {
            System.out.println("Cannot open: " + flightFile);
        return flightData;

class FlightData extends HashMap<String, List<Flight>> {

    void addFlight(String tailNr, Flight flight) {
        computeIfAbsent(tailNr, flights -> new ArrayList<>()).add(flight);

    int getDistanceTraveled(String tailNr) {
        int distance = 0;
        for (Flight f : get(tailNr)) distance += f.distance;
        return distance;


class Flight {
    String from;
    String to;
    int distance;

    public Flight(String from, String to, int distance) {
        this.from = from; = to;
        this.distance = distance;

Here is a bit more OOP way of doing it.

You extend the HashMap and add two new methods, one for adding flights and another for calculating total distance.
This way you are not constantly removing and adding back modified values into the HashMap.
You can expand on this to fit your needs.

import java.util.*;

public class Main {

    public static void main(String[] args) {
        FlightData flightData = getFlightDataFromFile();
        flightData.getDistanceTraveled(&quot;tail number&quot;);

    public static FlightData getFlightDataFromFile() {
        File flightFile = new File(&quot;flights.csv&quot;);
        FlightData flightData= new FlightData();

        try {
            Scanner s = new Scanner(flightFile);
            while (s.hasNextLine()) {
                String info = s.nextLine();
                String[] piecesOfInfo = info.split(&quot;,&quot;);
                String tailNr= piecesOfInfo[4];
                Flight flight = new Flight(piecesOfInfo[6], piecesOfInfo[9], Integer.parseInt(piecesOfInfo[21]));
                flightData.addFlight(tailNr, flight);
        } catch (FileNotFoundException e) {
            System.out.println(&quot;Cannot open: &quot; + flightFile);
        return flightData;

class FlightData extends HashMap&lt;String,List&lt;Flight&gt;&gt; {

    void addFlight(String tailNr, Flight flight) {
        computeIfAbsent(tailNr, flights -&gt; new ArrayList&lt;&gt;()).add(flight);

    int getDistanceTraveled(String tailNr) {
        int distance = 0;
        for (Flight f : get(tailNr)) distance+= f.distance;
        return distance;


class Flight {
    String from;
    String to;
    int distance;

    public Flight(String from, String to, int distance) {
        this.from = from; = to;
        this.distance = distance;


得分: 1

public static void main(String[] args) throws IOException {
    Map<String, String[]> map = createMap();
    Map<String, Long> planeMileages = map
            .collect(Collectors.groupingBy(o -> o.getValue()[4],
                            Collectors.summarizingInt(value ->
                                    Integer.parseInt(value.getValue()[21])), IntSummaryStatistics::getSum
    String maxKey = planeMileages.entrySet().stream().max(Comparator.comparing(Map.Entry::getValue)).get().getKey();
    System.out.println("max key: " + maxKey);

public static Map<String, String[]> createMap() throws IOException {
    try (BufferedReader a = Files.newBufferedReader(Paths.get("flights.csv"))) {
        return a.lines().map(s -> s.split(","))
                .collect(Collectors.toMap(piecesOfInfo -> String.join("_", piecesOfInfo[4], piecesOfInfo[2], piecesOfInfo[11]), Function.identity()));

public static Map<String, String[]> createMapLastDupWins() throws IOException {
    try (BufferedReader a = Files.newBufferedReader(Paths.get("flights.csv"))) {
        return a.lines().map(s -> s.split(","))
                .collect(Collectors.toMap(piecesOfInfo -> String.join("_", piecesOfInfo[4], piecesOfInfo[2], piecesOfInfo[11]), Function.identity(), (strings, strings2) -> {
                    //if this helps than data is duplicated
                    return strings2;

Hello can you check this?

public static void main(String[] args) throws  IOException {

    Map&lt;String, String[]&gt; map = createMap();
    Map&lt;String, Long&gt; planeMileages = map
            .collect(Collectors.groupingBy(o -&gt; o.getValue()[4],
                            Collectors.summarizingInt(value -&gt;
                                    Integer.parseInt(value.getValue()[21])), IntSummaryStatistics::getSum
    String maxKey = planeMileages.entrySet().stream().max(Comparator.comparing(Map.Entry::getValue)).get().getKey();
    System.out.println(&quot;max key: &quot;+ maxKey);

public static Map&lt;String, String[]&gt; createMap() throws IOException {
    try (BufferedReader a = Files.newBufferedReader(Paths.get(&quot;flights.csv&quot;))) {
        return a.lines().map(s -&gt; s.split(&quot;,&quot;))
                .collect(Collectors.toMap(piecesOfInfo -&gt; String.join(&quot;_&quot;, piecesOfInfo[4], piecesOfInfo[2], piecesOfInfo[11]), Function.identity()));

public static Map&lt;String, String[]&gt; createMapLastDupWins() throws IOException {
    try (BufferedReader a = Files.newBufferedReader(Paths.get(&quot;flights.csv&quot;))) {
        return a.lines().map(s -&gt; s.split(&quot;,&quot;))
                .collect(Collectors.toMap(piecesOfInfo -&gt; String.join(&quot;_&quot;, piecesOfInfo[4], piecesOfInfo[2], piecesOfInfo[11]), Function.identity(), (strings, strings2) -&gt; {
                    //if this helps than data is duplicated
                    return strings2;


得分: 1


HashMap<String, Integer> planeMileages = new HashMap<>();

for (String[] value : flightsMap.values()) {
    if (planeMileages.containsKey(value[4])) {
        planeMileages.put(value[4], planeMileages.get(value[4]) + Integer.valueOf(value[21]));
    } else {
        planeMileages.put(value[4], Integer.valueOf(value[21]));

Try this , and if the miles are very huge change Integer to Long and then check

   HashMap&lt;String, Integer&gt; planeMileages = new HashMap&lt;&gt;();
	for (String [] value : flightsMap.values()) {
		if(planeMileages.containsKey(value[4])) {
			planeMileages.put(value[4], planeMileages.get(value[4])+Integer.valueOf(value[21]));
		} else {
			planeMileages.put(value[4], Integer.valueOf(value[21]));

  • 本文由 发表于 2020年3月16日 03:05:37
  • 转载请务必保留本文链接:



:?: :razz: :sad: :evil: :!: :smile: :oops: :grin: :eek: :shock: :???: :cool: :lol: :mad: :twisted: :roll: :wink: :idea: :arrow: :neutral: :cry: :mrgreen:
