• 【电商项目第三问】



    一、创建类

    在这里插入图片描述

    二、代码如下:

    LogETLMapper

    import com.bigdata.hadoop.project.utils.GetPageId;
    import com.bigdata.hadoop.project.utils.LogParser;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.slf4j.Logger;
    import org.slf4j.LoggerFactory;
    
    import java.io.IOException;
    import java.util.Map;
    
    public class LogETLMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    
        private static final IntWritable one = new IntWritable(1);
        private Text outputKey = new Text();
        private LogParser logParser = new LogParser();
        private Logger logger = LoggerFactory.getLogger(LogETLMapper.class);
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            // 解析日志记录
            Map<String, String> logInfo = logParser.parse(value.toString());
    
            if (logInfo == null) {
                logger.error("日志记录的格式不正确或解析失败:" + value.toString());
                return;
            }
    
            // 获取需要的字段
            String ip = logInfo.get("ip");
            String url = logInfo.get("url");
            String country = logInfo.get("country");
            String province = logInfo.get("province");
            String city = logInfo.get("city");
    
            // 调用 GetPageId 获取 topicId
            String topicId = GetPageId.getPageId(url);
            logInfo.put("pageId", topicId);
    
            // 检查所有字段是否全部为空
            if (ip != null || url != null || topicId != null || country != null || province != null || city != null) {
                StringBuilder sb = new StringBuilder();
    
                if (ip != null && !ip.isEmpty()) sb.append("IP: ").append(ip).append(", ");
                if (url != null && !url.isEmpty()) sb.append("URL: ").append(url).append(", ");
                if (topicId != null && !topicId.isEmpty()) sb.append("PageId: ").append(topicId).append(", ");
                if (country != null && !country.isEmpty()) sb.append("Country: ").append(country).append(", ");
                if (province != null && !province.isEmpty()) sb.append("Province: ").append(province).append(", ");
                if (city != null && !city.isEmpty()) sb.append("City: ").append(city);
    
                // 移除末尾的逗号和空格
                String outputString = sb.toString().replaceAll(", $", "");
                outputKey.set(outputString);
                context.write(outputKey, one);
            } else {
                logger.error("所有字段为空,日志记录:" + value.toString());
            }
        }
    }
    
    
    
    

    LogETLReducer

    代码如下(示例):

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class LogETLReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
        private IntWritable result = new IntWritable();
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            result.set(sum);
            context.write(key, result);
        }
    }
    
    

    LogETLDriver

    代码如下(示例):

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class LogETLDriver {
    
        public static void main(String[] args) throws Exception {
            if (args.length != 2) {
                System.err.println("Usage: LogETLDriver  ");
                System.exit(-1);
            }
    
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf, "Log ETL");
    
            job.setJarByClass(LogETLDriver.class);
            job.setMapperClass(LogETLMapper.class);
            job.setCombinerClass(LogETLReducer.class);
            job.setReducerClass(LogETLReducer.class);
    
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
    
            FileInputFormat.addInputPath(job, new Path(args[0]));
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
            System.exit(job.waitForCompletion(true) ? 0 : 1);
        }
    }
    
    
    

    打jar包

    查看结果

    在这里插入图片描述

  • 相关阅读:
    AIE荧光分子杂化介孔二氧化硅杂化纳米微球/聚合诱导微米级多孔SiO2微球
    Word控件Spire.Doc 【文本】教程(15) ;如何在 C#、VB.NET 的组合框中添加、选择和删除项目
    Stable Diffusion 的提示词使用技巧
    1200万像素通过算法无失真扩展到1.92亿像素——加权概率模型收缩模型图像像素扩展算法
    深入理解强化学习——马尔可夫决策过程:马尔可夫决策过程和马尔可夫过程/马尔可夫奖励过程的区别
    WPF —— Calendar日历控件详解
    UE要素控制显隐
    如何从宏观层面构建优秀的大语言模型
    kibana操作es使用DSL查询
    JavaSE数据类型
  • 原文地址:https://blog.csdn.net/m0_66301161/article/details/139624633