• MapReduce项目案例4——乘用车辆和商用车辆销售数据分析


    项目介绍

    1.数据概况

    • 本数据为上牌汽车的销售数据,分为乘用车辆和商用车辆
    • 数据包含销售相关数据与汽车具体参数

    2.数据项包括

    • 省0,月1,市2,区县3,年4,车辆型号5,制造商6,品牌7,车辆类型8,所有权9,
    • 使用性质10,数量11,发动机型号12,排量13,功率14,燃料种类15,车长16,车宽17,车高18,车厢长19,
    • 车厢宽20,车厢高21,轴数22,轴距23,前轮距24,轮胎规格25,轮胎数26,总质量27,整备质量28,核定X质量29,
    • 核定载客30,准牵引质量31,底盘企业32,底盘品牌33,底盘型号34,发动机企业35,车辆名称36,年龄37,性别38

    3.输入数据

    • 数据量太大,此处复制不方便,自行百度
      在这里插入图片描述

    需求分析👇👇

    汽车行业市场分析

    1.通过统计乘用车辆(非营运)和商用车辆(营运)的数量和销售额分布

    • CountMap
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    /**
     * 1.1通过统计乘用车辆(非营运)和商用车辆(其他)的数量和销售额分布
     */
    public class CountMap extends Mapper<LongWritable, Text, IntWritable, LongWritable> {
        private IntWritable intWritable = new IntWritable();
        private LongWritable longWritable = new LongWritable();
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().trim().split(",");
            //月1 数量11
            if (split != null && split.length > 11 && split[11] != null && !"".equals(split[11].trim())) {
                try {
                    intWritable.set(Integer.parseInt(split[1]));
                    longWritable.set(Long.parseLong(split[11]));
                    context.write(intWritable, longWritable);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • CountCombine
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.logging.Logger;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:28
     */
    public class CountCombine extends Reducer<Text, LongWritable, Text, LongWritable> {
        private Logger logger = Logger.getLogger(CountCombine.class.getName());
    
        private LongWritable res = new LongWritable();
    
        public CountCombine() {
            logger.info("CountCombine的构造方法,是单例吗?");//是
        }
    
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            logger.info("CountCombine的setup执行时机");//开始一次
        }
    
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            logger.info("CountCombine的cleanup执行时机");//结束一次
        }
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            Long sum = new Long(0);
            for (LongWritable val : values) {
                sum += val.get();
            }
            res.set(sum);
            logger.info("combine合并:" + key.toString() + ":" + res.get());
            context.write(key, res);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • CountReduce
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Set;
    import java.util.logging.Logger;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:34
     */
    public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
        private Logger logger = Logger.getLogger(CountCombine.class.getName());
    
        Map<String, Long> map = new HashMap<>();
        double all = 0;
    
        public CountReduce() {
            logger.info("CountReduce的构造方法,是单例吗?");
        }
    
        @Override
        protected void setup(Context context) throws IOException, InterruptedException {
            logger.info("CountReduce的setup执行时机");
        }
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            long sum = 0;
            for (LongWritable val : values) {
                sum += val.get();
            }
            all += sum;
            map.put(key.toString(), sum);
            logger.info("CountReduce的reduce:" + key.toString() + ":" + sum);
        }
    
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            logger.info("CountReduce的cleanup执行时机");
            Set<String> keySet = map.keySet();
            for (String key : keySet) {
                long value = map.get(key);
                double percent = value / all;
                logger.info("CountReduce的cleanup:" + key.toString() + ":" + value + "\t" + percent);
                context.write(new Text(key), new Text(value + "\t" + percent));
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • App1
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:45
     */
    public class App1 {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Path input = new Path("E:\\HadoopMRData\\input");
            Path output = new Path("E:\\HadoopMRData\\output");
            if (args != null && args.length == 2) {
                input = new Path(args[0]);
                output = new Path(args[1]);
            }
            Configuration conf = new Configuration();
    
            //conf.set("fs.defaultFS","hdfs://node1:8020");
            /*FileSystem fs = FileSystem.get(conf);
            if (fs.exists(output)) {
                fs.delete(output, true);
            }*/
            Job job = Job.getInstance(conf, "通过统计乘用车辆(非营运)和商用车辆(其他)的数量和销售额分布");
            job.setJarByClass(App1.class);
    
            job.setMapperClass(CountMap.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
    
            job.setCombinerClass(CountCombine.class);
            job.setReducerClass(CountReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            //job.setNumReduceTasks(2);
    
            FileInputFormat.addInputPath(job, input);
            FileOutputFormat.setOutputPath(job, output);
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51

    2.统计山西省2013年每个月的汽车销售数量的比例,按月份排序

    • 输出格式:月份 数量 比例
    • CountMap
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class CountMap extends Mapper<LongWritable, Text, IntWritable, LongWritable> {
        private IntWritable intWritable = new IntWritable();
        private LongWritable longWritable = new LongWritable();
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().trim().split(",");
            //月1 数量11
            if (split != null && split.length > 11 && split[11] != null && !"".equals(split[11].trim())) {
                try {
                    intWritable.set(Integer.parseInt(split[1]));
                    longWritable.set(Long.parseLong(split[11]));
                    context.write(intWritable, longWritable);
                } catch (Exception e) {
                    e.printStackTrace();
                }
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • CountCombine
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:28
     */
    public class CountCombine extends Reducer<IntWritable, LongWritable, IntWritable, LongWritable> {
        private LongWritable res = new LongWritable();
    
        @Override
        protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            Long sum = new Long(0);
            for (LongWritable val : values) {
                sum += val.get();
            }
            res.set(sum);
            context.write(key, res);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • CountReduce
    import org.apache.hadoop.io.DoubleWritable;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Set;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:34
     */
    public class CountReduce extends Reducer<IntWritable, LongWritable, IntWritable, Text> {
        private Map<Integer, Long> map = new HashMap<Integer, Long>();
        private Long all = 0L;//总销售数
        private DoubleWritable doubleWritable = new DoubleWritable();//比例
    
        @Override
        protected void reduce(IntWritable key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            Long sum = 0L;
            for (LongWritable val : values) {
                sum += val.get();
            }
            all += sum;
            map.put(key.get(), sum);
        }
    
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            Set<Integer> keySet = map.keySet();
            for (Integer key : keySet) {
                Long value = map.get(key);
                double percent = value / (double) all;
                doubleWritable.set(percent);
                context.write(new IntWritable(key), new Text(value + "\t" + doubleWritable));
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • App2
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:45
     */
    public class App2 {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Path input = new Path("E:\\HadoopMRData\\input");
            Path output = new Path("E:\\HadoopMRData\\output");
            if (args != null && args.length == 2) {
                input = new Path(args[0]);
                output = new Path(args[1]);
            }
            Configuration conf = new Configuration();
    
            //conf.set("fs.defaultFS","hdfs://node1:8020");
            /*FileSystem fs = FileSystem.get(conf);
            if (fs.exists(output)) {
                fs.delete(output, true);
            }*/
            Job job = Job.getInstance(conf, "统计山西省2013年每个月的汽车销售数量的比例,按月份排序");
            job.setJarByClass(App2.class);
    
            job.setMapperClass(CountMap.class);
            job.setMapOutputKeyClass(IntWritable.class);
            job.setMapOutputValueClass(LongWritable.class);
    
            job.setCombinerClass(CountCombine.class);
            job.setReducerClass(CountReduce.class);
            job.setOutputKeyClass(IntWritable.class);
            job.setOutputValueClass(Text.class);
    
            //job.setNumReduceTasks(2);
    
            FileInputFormat.addInputPath(job, input);
            FileOutputFormat.setOutputPath(job, output);
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52

    3.统计安徽省2014年4月份各市区县的汽车销售的比例

    • 没有安徽省

    用户数据市场分析

    1.统计买车的男女比例及男女对车的颜色的选择

    • 没有颜色这个列
    • CountMap
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    
    public class CountMap extends Mapper<LongWritable, Text, Text, LongWritable> {
        @Override//map的数量由切片决定,一个map的执行顺序setup-map1-map2-cleanup
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().trim().split(",");
            if (split != null && split.length > 38 && split[38] != null) {
                if ("男性".equals(split[38]) || "女性".equals(split[38])) {
                    context.write(new Text(split[38]), new LongWritable(1));
                }
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • CountCombine
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:28
     */
    public class CountCombine extends Reducer<Text, LongWritable, Text, LongWritable> {
        private LongWritable res = new LongWritable();
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            long sum = 0L;
            for (LongWritable val : values) {
                sum += val.get();
            }
            res.set(sum);
            context.write(key, res);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • CountReduce
    import org.apache.hadoop.io.DoubleWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Set;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:34
     */
    public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
        private Map<String, Long> map = new HashMap<String, Long>();
        private long all = 0L;//总销售数
        private DoubleWritable doubleWritable = new DoubleWritable();//比例
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            long sum = 0L;
            for (LongWritable val : values) {
                sum += val.get();
            }
            all += sum;
            map.put(key.toString(), sum);
        }
    
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            Set<String> keySet = map.keySet();
            for (String key : keySet) {
                long value = map.get(key);
                double percent = value / (double) all;
                doubleWritable.set(percent);
                context.write(new Text(key), new Text(value + "\t" + doubleWritable));
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • App3
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:45
     */
    public class App3 {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Path input = new Path("E:\\HadoopMRData\\input");
            Path output = new Path("E:\\HadoopMRData\\output");
            if (args != null && args.length == 2) {
                input = new Path(args[0]);
                output = new Path(args[1]);
            }
            Configuration conf = new Configuration();
    
            //conf.set("fs.defaultFS","hdfs://node1:8020");
            /*FileSystem fs = FileSystem.get(conf);
            if (fs.exists(output)) {
                fs.delete(output, true);
            }*/
            Job job = Job.getInstance(conf, "统计买车的男女比例及男女对车的颜色的选择");
            job.setJarByClass(App3.class);
    
            job.setMapperClass(CountMap.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
    
            job.setCombinerClass(CountCombine.class);
            job.setReducerClass(CountReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            //job.setNumReduceTasks(2);
    
            FileInputFormat.addInputPath(job, input);
            FileOutputFormat.setOutputPath(job, output);
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51

    2.统计的车的所有权、型号和类型的汽车销售数及比例

    • CountMap
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    
    public class CountMap extends Mapper<LongWritable, Text, Text, LongWritable> {
        @Override//map的数量由切片决定,一个map的执行顺序setup-map1-map2-cleanup
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String[] split = value.toString().trim().split(",");
            //所有权10、型号6和类型9
            if (split != null && split.length > 10 && split[10] != null && split[6] != null && split[9] != null) {
                if (!"".equals(split[10]) && !"".equals(split[6]) && !"".equals(split[9])) {
                    context.write(new Text(split[10] + "\t" + split[6] + "\t" + split[9]), new LongWritable(1));
                }
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • CountReduce
    import org.apache.hadoop.io.DoubleWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Set;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:34
     */
    public class CountReduce extends Reducer<Text, LongWritable, Text, Text> {
        private Map<String, Long> map = new HashMap<String, Long>();
        private long all = 0L;//总销售数
        private DoubleWritable doubleWritable = new DoubleWritable();//比例
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
            long sum = 0L;
            for (LongWritable val : values) {
                sum += val.get();
            }
            all += sum;
            map.put(key.toString(), sum);
        }
    
        @Override
        protected void cleanup(Context context) throws IOException, InterruptedException {
            Set<String> keySet = map.keySet();
            for (String key : keySet) {
                long value = map.get(key);
                double percent = value / (double) all;
                doubleWritable.set(percent);
                context.write(new Text(key), new Text(value + "\t" + doubleWritable));
            }
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • App4
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * @program: Hadoop_MR
     * @description:
     * @author: 作者
     * @create: 2022-06-21 23:45
     */
    public class App4 {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Path input = new Path("E:\\HadoopMRData\\input");
            Path output = new Path("E:\\HadoopMRData\\output");
            if (args != null && args.length == 2) {
                input = new Path(args[0]);
                output = new Path(args[1]);
            }
            Configuration conf = new Configuration();
    
            //conf.set("fs.defaultFS","hdfs://node1:8020");
            /*FileSystem fs = FileSystem.get(conf);
            if (fs.exists(output)) {
                fs.delete(output, true);
            }*/
            Job job = Job.getInstance(conf, "统计的车的所有权、型号和类型");
            job.setJarByClass(App4.class);
    
            job.setMapperClass(CountMap.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
    
            job.setGroupingComparatorClass(Count10Group.class);
    
            job.setReducerClass(CountReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
    
            FileInputFormat.addInputPath(job, input);
            FileOutputFormat.setOutputPath(job, output);
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50

    3.每个类型车的用户做年龄和性别的统计

    不同车型销售统计分析

    1.统计不同类型车在一个月(对一段时间:如每个月或每年)的总销售量

    2.通过不同类型(品牌)车销售情况,来统计发动机型号和燃料种类

    3.统计价格相同而类型(品牌)不同车的销售量

    针对某一品牌的竞争分析

    1.统计一汽大众的每一年(每一个月)的销售量和增长率(趋势)

    2.统计一汽大众在山西和安徽销售量及其价格的差异

  • 相关阅读:
    PYTHON知识点学习-字典
    Verilog学习之Andgate
    UDP协议
    2022-09-12 Lex and YACC primer/HOWTO
    【C++】数组中出现次数超过一半的数字
    Bootstrap背景色设置相关
    11月16日,每日信息差
    长话短说:学习网络安全自学好还是报培训班?
    在测试中实施人工智能
    selenium UI使用小技巧集合
  • 原文地址:https://blog.csdn.net/weixin_51699336/article/details/125486616